attribution_report/attribution.py

# -*- coding: utf-8 -*-
import os
import json
import datetime
import logging
import subprocess
import sys

import pandas as pd
from dateutil.relativedelta import relativedelta
import easygui


L = logging.getLogger(__name__)


def get_dropbox_dir():
    """
        Windows and Mac get dropox dir for Business or fallback to personal
    """
    if os.name == "nt":
        dropbox_file = os.path.join(os.getenv('APPDATA'), 'Dropbox', 'info.json')
    else:
        dropbox_file = os.path.expanduser("~/.dropbox/info.json")
    with open(dropbox_file) as dbf:
        dbconfig = json.loads(dbf.read())

    if "business" in dbconfig:
        dropbox_dir = dbconfig['business']['path'] + "/*.xls"
    elif "personal" in dbconfig:
        dropbox_dir = dbconfig['personal']['path'] + "/*.xls"
    else:
        dropbox_dir = os.path.expanduser("~")

    return dropbox_dir


class AttributionReport(object):
    def __init__(self, months=6, footer_length=None):
        self.months = months
        self.footer_length = footer_length

        self.SF_DATE_COLUMN = "Date"
        self.DP_DATE_COLUMN = "Date Received"
        self.PI_COLUMN = "PI_Name"
        self.ORG_COLUMN = "Org Name"
        # Output the XLSX in this order

        self.OUTPUT_COLUMN_ORDER = ["Addgene Assigned", "Plasmid ID", "Deposit ID", "Institute", "PI Name",
                                    "Date Received", "Original Date", "Original ORG", "Original PI"]

        self.ACCEPTABLE_EXTENSIONS = ["*.csv", "*.xls", "*.xlsx"]

        # columns that need to be in the files
        self.REQUIRED_SF_COLUMNS = ["First Name", "Last Name", "Account Name", "Date", "Assigned"]
        self.REQUIRED_DP_COLUMNS = ["Org Name", "Deposit ID", "Plasmid ID", "PI_Name", "Date Received"]

        # After load and merging, delete these columns
        self.SF_TRIM_COLUMNS = ["Subject", "Created Date", "LIMS Organization ID",
                                "Account Description"]
        self.DP_TRIM_COLUMNS = ["Org ID", "Deposit Status", "PI_ID", "Date Available", "# Orders",
                                "# Plasmids in the Deposit", "Addgene Contact", "Country"]

        self.DEFAULT_DIR = get_dropbox_dir()

    def _get_dataframe_by_extension(self, path, date_cols):
        """
            Gets a dataframe either by .csv, or .xls(x),
            or erroring and exiting.
        """
        _, ext = os.path.splitext(path)

        if ext == ".csv":
            df = pd.read_csv(path, parse_dates=date_cols, encoding='utf-8')
        elif ext in [".xlsx", ".xls"]:
            df = pd.read_excel(path, parse_dates=date_cols, encoding='utf-8')
        else:
            easygui.msgbox("File was not of type {0}.\nQuitting".format(
                " ".join(self.ACCEPTABLE_EXTENSIONS)),
                "ERROR")
            sys.exit(1)
        return df

    def get_dataframes(self):
        salesforce_data_name = easygui.fileopenbox("Salesforce Export",
                                                   default=self.DEFAULT_DIR,
                                                   filetypes=self.ACCEPTABLE_EXTENSIONS)
        if salesforce_data_name == ".":
            easygui.msgbox("You did not select a Salesforce Export, stopping program.",
                           "Good Bye")
            sys.exit(1)
        salesforce_df = self._get_dataframe_by_extension(salesforce_data_name, date_cols=[4, 5])
        if set(self.REQUIRED_SF_COLUMNS) < set(salesforce_df.columns):
            L.info("Proper columns")
        else:
            L.info("Wrong columns")
            easygui.msgbox("At a minimum, the Salesforce file must have the following columns:\n\n"
                           "{0}\n\n"
                           "Please re-run and select a proper file.".format(", ".join(self.REQUIRED_SF_COLUMNS)),
                           "Incorrect columns")
            sys.exit(1)

        deposit_data_name = easygui.fileopenbox("Deposit Data",
                                                default=self.DEFAULT_DIR,
                                                filetypes=self.ACCEPTABLE_EXTENSIONS)
        if deposit_data_name == ".":
            easygui.msgbox("You did not select a Deposit Data Export, stopping program.",
                           "Good Bye")
            sys.exit(1)
        deposit_df = self._get_dataframe_by_extension(deposit_data_name, date_cols=[7, 8])
        if set(self.REQUIRED_DP_COLUMNS) < set(deposit_df.columns):
            L.info("Proper columns")
        else:
            L.info("Wrong columns")
            easygui.msgbox("At a minimum, the Deposit Data file must have the following columns:\n\n"
                           "{0}\n\n"
                           "Please re-run and select a proper file.".format(", ".join(self.REQUIRED_DP_COLUMNS)),
                           "Incorrect columns")
            sys.exit(1)

        salesforce_df, deposit_df = self.clean_dataframes(salesforce_df, deposit_df)
        return salesforce_df, deposit_df

    def clean_dataframes(self, salesforce_df, deposit_df):
        # Get rid of the footer that Salesforce adds.
        if self.footer_length:
            length_with_footer = len(salesforce_df.index)
            salesforce_df = salesforce_df.head(length_with_footer - self.footer_length)

        # Clean up Salesforce
        salesforce_df.sort(self.SF_DATE_COLUMN, ascending=1)

        # Cleanup Deposit Data
        deposit_df['Org Name'].fillna('', inplace=True)
        deposit_df.sort(self.DP_DATE_COLUMN, ascending=1)
        deposit_df['PI_Name'].astype(unicode)

        # Cleanup not needed columns
        for col in self.SF_TRIM_COLUMNS:
            del salesforce_df[col]
        for col in self.DP_TRIM_COLUMNS:
            del deposit_df[col]

        return salesforce_df, deposit_df

    def get_filtered(self, filtered_df, sf_row, pi_name, pi_org, org=False):
        """
            Assume kind is PI by default.
            Filter where either the PI and PI match, or the Org and Org match
            If both match, add it to the the double list
            if only one matches, add it to the single list.
        """
        filter_column = self.PI_COLUMN
        filter_value = pi_name
        single, double = [], []

        if org:
            filter_column = self.ORG_COLUMN
            filter_value = pi_org

        name_match = filtered_df[filtered_df[filter_column] == filter_value]

        if not name_match.empty:
            for _, row in name_match.iterrows():
                data = {
                    "Addgene Assigned": sf_row['Assigned'],
                    "Plasmid ID": row['Plasmid ID'],
                    "Deposit ID": row['Deposit ID'],
                    "Institute": row['Org Name'],
                    "PI Name": row['PI_Name'],
                    "Date Received": row[self.DP_DATE_COLUMN],
                    "Original Date": sf_row[self.SF_DATE_COLUMN],
                    "Original ORG": pi_org,
                    "Original PI": pi_name,
                }
                if (data['Institute'] == data['Original ORG']) and \
                        (data['PI Name'] == data['Original PI']):
                    double.append(data)
                else:
                    single.append(data)
        return single, double

    def get_attribution_dataframes(self):
        salesforce_df, deposit_df = self.get_dataframes()

        name_matches = []
        org_matches = []
        double_matches = []
        mismatches = []

        # Iterate through the Salesforce report as the master document
        for index, sf_row in salesforce_df.iterrows():
            # Get a start date and an end date for filtering.
            start_date = sf_row[self.SF_DATE_COLUMN]
            end_date = start_date + relativedelta(months=self.months)

            start = deposit_df[self.DP_DATE_COLUMN].searchsorted(start_date)[0]
            end = deposit_df[self.DP_DATE_COLUMN].searchsorted(end_date)[0]

            # Filter the deposit data to grab only things within that timeframe.
            filtered_df = deposit_df.ix[start:end]

            # Variables for short names, and not having to type index a lot.
            pi_name = unicode(sf_row['First Name'].map(unicode) + " " + sf_row['Last Name'])
            pi_org = sf_row['Account Name']

            # Get matches by the PI's name
            by_name, pi_by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org)
            name_matches.extend(by_name)
            mismatches.extend(by_name)
            double_matches.extend(pi_by_both)

            # Get matches by the organization name
            by_org, org_by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, org=True)
            org_matches.extend(by_org)
            mismatches.extend(by_org)
            double_matches.extend(org_by_both)

        return (
            ("PI", pd.DataFrame(name_matches, columns=self.OUTPUT_COLUMN_ORDER)),
            # ("Institute", pd.DataFrame(org_matches, columns=self.OUTPUT_COLUMN_ORDER)),
            ("Double", pd.DataFrame(double_matches, columns=self.OUTPUT_COLUMN_ORDER)),
            # ("Single", pd.DataFrame(mismatches, columns=self.OUTPUT_COLUMN_ORDER))
        )

    def run(self):
        frames = self.get_attribution_dataframes()

        self.dirname = easygui.diropenbox("Where to save reports?", "Select Report Output Directory", self.DEFAULT_DIR)
        if not self.dirname:
            self.dirname = self.DEFAULT_DIR

        for key, df in frames:
            fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(datetime.date.today(), key)

            xls_path = os.path.join(self.dirname, fname)

            deduped_df = df.drop_duplicates()

            with pd.ExcelWriter(xls_path, engine='xlsxwriter') as writer:
                deduped_df.to_excel(writer, sheet_name='Sheet1', index=False)


        if os.name == "nt":
            subprocess.call("explorer {0}".format(self.dirname),shell=True)
        else:
            # Open the last path
            subprocess.call(["open", "-R", xls_path])


def main():
    try:
        report = AttributionReport(months=6, footer_length=6)
        report.run()
        easygui.msgbox("Done, your file are saved where you chose.", "Done!")
    except:
        easygui.exceptionbox()


if __name__ == '__main__':
    main()
added encoding 2015-07-17 04:15:20 +00:00			`# -- coding: utf-8 --`
Initial commit 2015-07-16 18:13:45 +00:00			`import os`
			`import json`
before gui 2015-07-16 19:10:17 +00:00			`import datetime`
			`import logging`
Reveal file when done 2015-07-17 14:48:17 +00:00			`import subprocess`
EasyGui interface 2015-07-16 20:21:22 +00:00			`import sys`
Initial commit 2015-07-16 18:13:45 +00:00
			`import pandas as pd`
			`from dateutil.relativedelta import relativedelta`
before gui 2015-07-16 19:10:17 +00:00			`import easygui`

Reveal file when done 2015-07-17 14:48:17 +00:00

before gui 2015-07-16 19:10:17 +00:00			`L = logging.getLogger(__name__)`
Initial commit 2015-07-16 18:13:45 +00:00

Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`def get_dropbox_dir():`
			`"""`
			`Windows and Mac get dropox dir for Business or fallback to personal`
			`"""`
			`if os.name == "nt":`
			`dropbox_file = os.path.join(os.getenv('APPDATA'), 'Dropbox', 'info.json')`
			`else:`
			`dropbox_file = os.path.expanduser("~/.dropbox/info.json")`
			`with open(dropbox_file) as dbf:`
			`dbconfig = json.loads(dbf.read())`

			`if "business" in dbconfig:`
			`dropbox_dir = dbconfig['business']['path'] + "/*.xls"`
			`elif "personal" in dbconfig:`
			`dropbox_dir = dbconfig['personal']['path'] + "/*.xls"`
			`else:`
			`dropbox_dir = os.path.expanduser("~")`

			`return dropbox_dir`


Initial commit 2015-07-16 18:13:45 +00:00			`class AttributionReport(object):`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`def __init__(self, months=6, footer_length=None):`
Initial commit 2015-07-16 18:13:45 +00:00			`self.months = months`
			`self.footer_length = footer_length`
EasyGui interface 2015-07-16 20:21:22 +00:00
cleanup 2015-07-16 20:24:08 +00:00			`self.SF_DATE_COLUMN = "Date"`
			`self.DP_DATE_COLUMN = "Date Received"`
Initial commit 2015-07-16 18:13:45 +00:00			`self.PI_COLUMN = "PI_Name"`
			`self.ORG_COLUMN = "Org Name"`
cleanup 2015-07-16 20:24:08 +00:00			`# Output the XLSX in this order`

			`self.OUTPUT_COLUMN_ORDER = ["Addgene Assigned", "Plasmid ID", "Deposit ID", "Institute", "PI Name",`
			`"Date Received", "Original Date", "Original ORG", "Original PI"]`

			`self.ACCEPTABLE_EXTENSIONS = [".csv", ".xls", "*.xlsx"]`
EasyGui interface 2015-07-16 20:21:22 +00:00
			`# columns that need to be in the files`
cleanup 2015-07-16 20:24:08 +00:00			`self.REQUIRED_SF_COLUMNS = ["First Name", "Last Name", "Account Name", "Date", "Assigned"]`
			`self.REQUIRED_DP_COLUMNS = ["Org Name", "Deposit ID", "Plasmid ID", "PI_Name", "Date Received"]`
EasyGui interface 2015-07-16 20:21:22 +00:00
			`# After load and merging, delete these columns`
dont store full name, use first and last later 2015-07-17 16:07:38 +00:00			`self.SF_TRIM_COLUMNS = ["Subject", "Created Date", "LIMS Organization ID",`
EasyGui interface 2015-07-16 20:21:22 +00:00			`"Account Description"]`
cleanup 2015-07-16 20:24:08 +00:00			`self.DP_TRIM_COLUMNS = ["Org ID", "Deposit Status", "PI_ID", "Date Available", "# Orders",`
			`"# Plasmids in the Deposit", "Addgene Contact", "Country"]`
EasyGui interface 2015-07-16 20:21:22 +00:00
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`self.DEFAULT_DIR = get_dropbox_dir()`
EasyGui interface 2015-07-16 20:21:22 +00:00
			`def _get_dataframe_by_extension(self, path, date_cols):`
			`"""`
			`Gets a dataframe either by .csv, or .xls(x),`
			`or erroring and exiting.`
			`"""`
			`_, ext = os.path.splitext(path)`
Initial commit 2015-07-16 18:13:45 +00:00
EasyGui interface 2015-07-16 20:21:22 +00:00			`if ext == ".csv":`
			`df = pd.read_csv(path, parse_dates=date_cols, encoding='utf-8')`
			`elif ext in [".xlsx", ".xls"]:`
			`df = pd.read_excel(path, parse_dates=date_cols, encoding='utf-8')`
Initial commit 2015-07-16 18:13:45 +00:00			`else:`
EasyGui interface 2015-07-16 20:21:22 +00:00			`easygui.msgbox("File was not of type {0}.\nQuitting".format(`
			`" ".join(self.ACCEPTABLE_EXTENSIONS)),`
			`"ERROR")`
			`sys.exit(1)`
Initial commit 2015-07-16 18:13:45 +00:00			`return df`

			`def get_dataframes(self):`
EasyGui interface 2015-07-16 20:21:22 +00:00			`salesforce_data_name = easygui.fileopenbox("Salesforce Export",`
			`default=self.DEFAULT_DIR,`
			`filetypes=self.ACCEPTABLE_EXTENSIONS)`
			`if salesforce_data_name == ".":`
			`easygui.msgbox("You did not select a Salesforce Export, stopping program.",`
			`"Good Bye")`
			`sys.exit(1)`
			`salesforce_df = self._get_dataframe_by_extension(salesforce_data_name, date_cols=[4, 5])`
			`if set(self.REQUIRED_SF_COLUMNS) < set(salesforce_df.columns):`
			`L.info("Proper columns")`
			`else:`
			`L.info("Wrong columns")`
			`easygui.msgbox("At a minimum, the Salesforce file must have the following columns:\n\n"`
			`"{0}\n\n"`
			`"Please re-run and select a proper file.".format(", ".join(self.REQUIRED_SF_COLUMNS)),`
			`"Incorrect columns")`
			`sys.exit(1)`

			`deposit_data_name = easygui.fileopenbox("Deposit Data",`
			`default=self.DEFAULT_DIR,`
			`filetypes=self.ACCEPTABLE_EXTENSIONS)`
			`if deposit_data_name == ".":`
			`easygui.msgbox("You did not select a Deposit Data Export, stopping program.",`
			`"Good Bye")`
			`sys.exit(1)`
			`deposit_df = self._get_dataframe_by_extension(deposit_data_name, date_cols=[7, 8])`
			`if set(self.REQUIRED_DP_COLUMNS) < set(deposit_df.columns):`
			`L.info("Proper columns")`
			`else:`
			`L.info("Wrong columns")`
			`easygui.msgbox("At a minimum, the Deposit Data file must have the following columns:\n\n"`
			`"{0}\n\n"`
			`"Please re-run and select a proper file.".format(", ".join(self.REQUIRED_DP_COLUMNS)),`
			`"Incorrect columns")`
			`sys.exit(1)`

Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`salesforce_df, deposit_df = self.clean_dataframes(salesforce_df, deposit_df)`
			`return salesforce_df, deposit_df`

			`def clean_dataframes(self, salesforce_df, deposit_df):`
			`# Get rid of the footer that Salesforce adds.`
			`if self.footer_length:`
			`length_with_footer = len(salesforce_df.index)`
			`salesforce_df = salesforce_df.head(length_with_footer - self.footer_length)`

			`# Clean up Salesforce`
			`salesforce_df.sort(self.SF_DATE_COLUMN, ascending=1)`

			`# Cleanup Deposit Data`
			`deposit_df['Org Name'].fillna('', inplace=True)`
			`deposit_df.sort(self.DP_DATE_COLUMN, ascending=1)`
			`deposit_df['PI_Name'].astype(unicode)`

			`# Cleanup not needed columns`
			`for col in self.SF_TRIM_COLUMNS:`
			`del salesforce_df[col]`
			`for col in self.DP_TRIM_COLUMNS:`
			`del deposit_df[col]`

Initial commit 2015-07-16 18:13:45 +00:00			`return salesforce_df, deposit_df`

Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`def get_filtered(self, filtered_df, sf_row, pi_name, pi_org, org=False):`
			`"""`
			`Assume kind is PI by default.`
			`Filter where either the PI and PI match, or the Org and Org match`
			`If both match, add it to the the double list`
			`if only one matches, add it to the single list.`
			`"""`
			`filter_column = self.PI_COLUMN`
			`filter_value = pi_name`
			`single, double = [], []`

			`if org:`
Initial commit 2015-07-16 18:13:45 +00:00			`filter_column = self.ORG_COLUMN`
			`filter_value = pi_org`

			`name_match = filtered_df[filtered_df[filter_column] == filter_value]`

			`if not name_match.empty:`
			`for _, row in name_match.iterrows():`
			`data = {`
			`"Addgene Assigned": sf_row['Assigned'],`
			`"Plasmid ID": row['Plasmid ID'],`
			`"Deposit ID": row['Deposit ID'],`
			`"Institute": row['Org Name'],`
			`"PI Name": row['PI_Name'],`
cleanup 2015-07-16 20:24:08 +00:00			`"Date Received": row[self.DP_DATE_COLUMN],`
			`"Original Date": sf_row[self.SF_DATE_COLUMN],`
Initial commit 2015-07-16 18:13:45 +00:00			`"Original ORG": pi_org,`
			`"Original PI": pi_name,`
			`}`
before gui 2015-07-16 19:10:17 +00:00			`if (data['Institute'] == data['Original ORG']) and \`
			`(data['PI Name'] == data['Original PI']):`
			`double.append(data)`
			`else:`
			`single.append(data)`
			`return single, double`
Initial commit 2015-07-16 18:13:45 +00:00
			`def get_attribution_dataframes(self):`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`salesforce_df, deposit_df = self.get_dataframes()`

Initial commit 2015-07-16 18:13:45 +00:00			`name_matches = []`
			`org_matches = []`
before gui 2015-07-16 19:10:17 +00:00			`double_matches = []`
			`mismatches = []`
Initial commit 2015-07-16 18:13:45 +00:00
			`# Iterate through the Salesforce report as the master document`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`for index, sf_row in salesforce_df.iterrows():`
Initial commit 2015-07-16 18:13:45 +00:00			`# Get a start date and an end date for filtering.`
cleanup 2015-07-16 20:24:08 +00:00			`start_date = sf_row[self.SF_DATE_COLUMN]`
Initial commit 2015-07-16 18:13:45 +00:00			`end_date = start_date + relativedelta(months=self.months)`

Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`start = deposit_df[self.DP_DATE_COLUMN].searchsorted(start_date)[0]`
			`end = deposit_df[self.DP_DATE_COLUMN].searchsorted(end_date)[0]`
Initial commit 2015-07-16 18:13:45 +00:00
			`# Filter the deposit data to grab only things within that timeframe.`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`filtered_df = deposit_df.ix[start:end]`
Initial commit 2015-07-16 18:13:45 +00:00
			`# Variables for short names, and not having to type index a lot.`
added gu8i 2015-07-17 18:18:20 +00:00			`pi_name = unicode(sf_row['First Name'].map(unicode) + " " + sf_row['Last Name'])`
Initial commit 2015-07-16 18:13:45 +00:00			`pi_org = sf_row['Account Name']`

			`# Get matches by the PI's name`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`by_name, pi_by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org)`
Initial commit 2015-07-16 18:13:45 +00:00			`name_matches.extend(by_name)`
before gui 2015-07-16 19:10:17 +00:00			`mismatches.extend(by_name)`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`double_matches.extend(pi_by_both)`
Initial commit 2015-07-16 18:13:45 +00:00
			`# Get matches by the organization name`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`by_org, org_by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, org=True)`
Initial commit 2015-07-16 18:13:45 +00:00			`org_matches.extend(by_org)`
before gui 2015-07-16 19:10:17 +00:00			`mismatches.extend(by_org)`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`double_matches.extend(org_by_both)`
before gui 2015-07-16 19:10:17 +00:00
EasyGui interface 2015-07-16 20:21:22 +00:00			`return (`
			`("PI", pd.DataFrame(name_matches, columns=self.OUTPUT_COLUMN_ORDER)),`
temp dont do big files 2015-07-17 14:57:43 +00:00			`# ("Institute", pd.DataFrame(org_matches, columns=self.OUTPUT_COLUMN_ORDER)),`
EasyGui interface 2015-07-16 20:21:22 +00:00			`("Double", pd.DataFrame(double_matches, columns=self.OUTPUT_COLUMN_ORDER)),`
temp dont do big files 2015-07-17 14:57:43 +00:00			`# ("Single", pd.DataFrame(mismatches, columns=self.OUTPUT_COLUMN_ORDER))`
EasyGui interface 2015-07-16 20:21:22 +00:00			`)`
Initial commit 2015-07-16 18:13:45 +00:00
			`def run(self):`
before gui 2015-07-16 19:10:17 +00:00			`frames = self.get_attribution_dataframes()`

Reveal file when done 2015-07-17 14:48:17 +00:00			`self.dirname = easygui.diropenbox("Where to save reports?", "Select Report Output Directory", self.DEFAULT_DIR)`
			`if not self.dirname:`
			`self.dirname = self.DEFAULT_DIR`
EasyGui interface 2015-07-16 20:21:22 +00:00
			`for key, df in frames:`
			`fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(datetime.date.today(), key)`
before gui 2015-07-16 19:10:17 +00:00
Reveal file when done 2015-07-17 14:48:17 +00:00			`xls_path = os.path.join(self.dirname, fname)`
EasyGui interface 2015-07-16 20:21:22 +00:00
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`deduped_df = df.drop_duplicates()`

			`with pd.ExcelWriter(xls_path, engine='xlsxwriter') as writer:`
			`deduped_df.to_excel(writer, sheet_name='Sheet1', index=False)`
before gui 2015-07-16 19:10:17 +00:00
Initial commit 2015-07-16 18:13:45 +00:00
Reveal file when done 2015-07-17 14:48:17 +00:00			`if os.name == "nt":`
			`subprocess.call("explorer {0}".format(self.dirname),shell=True)`
			`else:`
			`# Open the last path`
			`subprocess.call(["open", "-R", xls_path])`


EasyGui interface 2015-07-16 20:21:22 +00:00			`def main():`
			`try:`
Cleanup, rename, move functions around, context managers 2015-07-16 20:52:16 +00:00			`report = AttributionReport(months=6, footer_length=6)`
EasyGui interface 2015-07-16 20:21:22 +00:00			`report.run()`
Reveal file when done 2015-07-17 14:48:17 +00:00			`easygui.msgbox("Done, your file are saved where you chose.", "Done!")`
EasyGui interface 2015-07-16 20:21:22 +00:00			`except:`
			`easygui.exceptionbox()`

cleanup 2015-07-16 20:24:08 +00:00
EasyGui interface 2015-07-16 20:21:22 +00:00			`if __name__ == '__main__':`
cleanup 2015-07-16 20:24:08 +00:00			`main()`