diff --git a/main.py b/main.py index 6944483..cf95cea 100644 --- a/main.py +++ b/main.py @@ -2,9 +2,9 @@ import os import json import datetime import logging +import sys import pandas as pd -import dropbox from dateutil.relativedelta import relativedelta import easygui @@ -21,63 +21,89 @@ class AttributionReport(object): def __init__(self, credentials_file, months=6, footer_length=None): self.months = months self.footer_length = footer_length + self.PI_COLUMN = "PI_Name" self.ORG_COLUMN = "Org Name" - self.column_order = ["Addgene Assigned", - "Plasmid ID", - "Deposit ID", - "Institute", - "PI Name", - "Date Received", - "Original Date", - "Original ORG", - "Original PI"] + self.OUTPUT_COLUMN_ORDER = ["Addgene Assigned", + "Plasmid ID", + "Deposit ID", + "Institute", + "PI Name", + "Date Received", + "Original Date", + "Original ORG", + "Original PI"] - with open(credentials_file, "r") as cred_f: - creds = json.loads(cred_f.read()) + self.ACCEPTABLE_EXTENSIONS = ["*.csv", + "*.xls", + "*.xlsx"] - self.app_key = creds['app_key'] - self.app_secret = creds['app_secret'] + # columns that need to be in the files + self.REQUIRED_SF_COLUMNS = ["First Name", + "Last Name", + "Account Name", + "Date", + "Assigned"] + self.REQUIRED_DP_COLUMNS = ["Org Name", + "Deposit ID", + "Plasmid ID", + "PI_Name", + "Date Received"] - if not creds.get("access_token", None): - self.authorize() + # After load and merging, delete these columns + self.SF_TRIM_COLUMNS = ["Subject", + "First Name", + "Last Name", + "Created Date", + "LIMS Organization ID", + "Account Description"] + self.DP_TRIM_COLUMNS = ["Org ID", + "Deposit Status", + "PI_ID", + "Date Available", + "# Orders", + "# Plasmids in the Deposit", + "Addgene Contact", + "Country"] + + self.DEFAULT_DIR = self.get_dropbox_dir() + + def get_dropbox_dir(self): + """ + Windows and Mac get dropox dir for Business or fallback to personal + """ + if os.name == "nt": + dropbox_file = os.path.join(os.getenv('APPDATA'), 'Dropbox', 'info.json') else: - self.access_token = creds['access_token'] - self.user_id = creds['user_id'] + dropbox_file = os.path.expanduser("~/.dropbox/info.json") + with open(dropbox_file) as dbf: + dbconfig = json.loads(dbf.read()) - def authorize(self): - flow = dropbox.client.DropboxOAuth2FlowNoRedirect(self.app_key, self.app_secret) - authorize_url = flow.start() - print '1. Go to: ' + authorize_url - print '2. Click "Allow" (you might have to log in first)' - print '3. Copy the authorization code.' - code = raw_input("Enter the authorization code here: ").strip() - access_token, user_id = flow.finish(code) - self.access_token = access_token - self.user_id = user_id - - creds = {"app_key": self.app_key, - "app_secret": self.app_secret, - "access_token": self.access_token, - "user_id": self.user_id} - - # Save so we don't have to do this again. - with open("credentials.json", "w") as f: - f.write(json.dumps(creds)) - - def _open_file_frame(self, filename, date_cols): - if DROPBOX: - L.info("Getting file from Dropbox") - client = dropbox.client.DropboxClient(self.access_token) - L.info("Got file from Dropbox") - f = client.get_file(filename) + if "business" in dbconfig: + dropbox_dir = dbconfig['business']['path'] + "/*.xls" + elif "personal" in dbconfig: + dropbox_dir = dbconfig['personal']['path'] + "/*.xls" else: - f = os.path.normpath(local_dropbox_path + filename) + dropbox_dir = os.path.expanduser("~") - if filename[-4:] == ".csv": - df = pd.read_csv(f, parse_dates=date_cols, encoding='utf-8') + return dropbox_dir + + def _get_dataframe_by_extension(self, path, date_cols): + """ + Gets a dataframe either by .csv, or .xls(x), + or erroring and exiting. + """ + _, ext = os.path.splitext(path) + + if ext == ".csv": + df = pd.read_csv(path, parse_dates=date_cols, encoding='utf-8') + elif ext in [".xlsx", ".xls"]: + df = pd.read_excel(path, parse_dates=date_cols, encoding='utf-8') else: - df = pd.read_excel(f, parse_dates=date_cols, encoding='utf-8') + easygui.msgbox("File was not of type {0}.\nQuitting".format( + " ".join(self.ACCEPTABLE_EXTENSIONS)), + "ERROR") + sys.exit(1) return df def get_dataframes(self): @@ -85,28 +111,67 @@ class AttributionReport(object): This gets the Salesforce and the Deposit dataframes. Then it does some cleanup of the columns """ - salesforce_data_name = '/Addgene Shared/Dev/Attribution Report/salesforce_report.xlsx' - salesforce_df = self._open_file_frame(salesforce_data_name, date_cols=[4, 5]) + salesforce_df, deposit_df = self.get_files() + # Get rid of the footer that Salesforce adds. if self.footer_length: length_with_footer = len(salesforce_df.index) salesforce_df = salesforce_df.head(length_with_footer - self.footer_length) - deposit_data_name = 'Addgene Shared/Dev/Attribution Report/deposit_data.csv' - deposit_df = self._open_file_frame(deposit_data_name, date_cols=[7, 8]) - # Clean up Salesforce - salesforce_df['Account Description'].fillna('', inplace=True) salesforce_df.sort(SF_DATE, ascending=1) salesforce_df["Full Name"] = salesforce_df["First Name"].map(unicode) + " " + salesforce_df["Last Name"] - del salesforce_df["First Name"] - del salesforce_df["Last Name"] # Cleanup Deposit Data deposit_df['Org Name'].fillna('', inplace=True) deposit_df.sort(DP_DATE, ascending=1) deposit_df['PI_Name'].astype(unicode) + # Cleanup not needed columns + for col in self.SF_TRIM_COLUMNS: + del salesforce_df[col] + for col in self.DP_TRIM_COLUMNS: + del deposit_df[col] + + return salesforce_df, deposit_df + + def get_files(self): + salesforce_data_name = easygui.fileopenbox("Salesforce Export", + default=self.DEFAULT_DIR, + filetypes=self.ACCEPTABLE_EXTENSIONS) + if salesforce_data_name == ".": + easygui.msgbox("You did not select a Salesforce Export, stopping program.", + "Good Bye") + sys.exit(1) + salesforce_df = self._get_dataframe_by_extension(salesforce_data_name, date_cols=[4, 5]) + if set(self.REQUIRED_SF_COLUMNS) < set(salesforce_df.columns): + L.info("Proper columns") + else: + L.info("Wrong columns") + easygui.msgbox("At a minimum, the Salesforce file must have the following columns:\n\n" + "{0}\n\n" + "Please re-run and select a proper file.".format(", ".join(self.REQUIRED_SF_COLUMNS)), + "Incorrect columns") + sys.exit(1) + + deposit_data_name = easygui.fileopenbox("Deposit Data", + default=self.DEFAULT_DIR, + filetypes=self.ACCEPTABLE_EXTENSIONS) + if deposit_data_name == ".": + easygui.msgbox("You did not select a Deposit Data Export, stopping program.", + "Good Bye") + sys.exit(1) + deposit_df = self._get_dataframe_by_extension(deposit_data_name, date_cols=[7, 8]) + if set(self.REQUIRED_DP_COLUMNS) < set(deposit_df.columns): + L.info("Proper columns") + else: + L.info("Wrong columns") + easygui.msgbox("At a minimum, the Deposit Data file must have the following columns:\n\n" + "{0}\n\n" + "Please re-run and select a proper file.".format(", ".join(self.REQUIRED_DP_COLUMNS)), + "Incorrect columns") + sys.exit(1) + return salesforce_df, deposit_df def get_filtered(self, filtered_df, sf_row, pi_name, pi_org, kind): @@ -176,31 +241,42 @@ class AttributionReport(object): mismatches.extend(by_org) double_matches.extend(by_both) - return {"PI": pd.DataFrame(name_matches, columns=self.column_order), - "Institute": pd.DataFrame(org_matches, columns=self.column_order), - "Double": pd.DataFrame(double_matches, columns=self.column_order), - "Single": pd.DataFrame(mismatches, columns=self.column_order)} + return ( + ("PI", pd.DataFrame(name_matches, columns=self.OUTPUT_COLUMN_ORDER)), + ("Institute", pd.DataFrame(org_matches, columns=self.OUTPUT_COLUMN_ORDER)), + ("Double", pd.DataFrame(double_matches, columns=self.OUTPUT_COLUMN_ORDER)), + ("Single", pd.DataFrame(mismatches, columns=self.OUTPUT_COLUMN_ORDER)) + ) def run(self): frames = self.get_attribution_dataframes() - for key, df in frames.items(): - fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format( - datetime.date.today(), - key) - print "Writing", fname - writer = pd.ExcelWriter( - fname, + dirname = easygui.diropenbox("Where to save reports?", "Select Report Output Directory", self.DEFAULT_DIR) + if not dirname: + dirname = self.DEFAULT_DIR - engine='xlsxwriter') + for key, df in frames: + fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(datetime.date.today(), key) + + xls_path = os.path.join(dirname, fname) + print "Writing", xls_path + + writer = pd.ExcelWriter(xls_path, + engine='xlsxwriter') df.to_excel(writer, sheet_name='Sheet1', index=False) writer.save() -if __name__ == '__main__': - report = AttributionReport(credentials_file="credentials.json", +def main(): + try: + report = AttributionReport(credentials_file="credentials.json", months=6, footer_length=6) - report.run() + report.run() + except: + easygui.exceptionbox() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 041d949..0b63f90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ python-dateutil xlwt #openpyxl==1.8.6 xlsxwriter -progressbar==2.3 \ No newline at end of file +progressbar==2.3 +easygui \ No newline at end of file