From 0ca5217ddc55e9a7d862ae8d07f8d3ad635fbf78 Mon Sep 17 00:00:00 2001 From: Tyrel Souza Date: Thu, 16 Jul 2015 15:10:17 -0400 Subject: [PATCH] before gui --- main.py | 70 ++++++++++++++++++++++++++++++++++++------------ requirements.txt | 6 ++++- 2 files changed, 58 insertions(+), 18 deletions(-) diff --git a/main.py b/main.py index f569157..6944483 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,14 @@ import os import json +import datetime +import logging import pandas as pd import dropbox from dateutil.relativedelta import relativedelta +import easygui + +L = logging.getLogger(__name__) DROPBOX = False local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/" @@ -18,6 +23,15 @@ class AttributionReport(object): self.footer_length = footer_length self.PI_COLUMN = "PI_Name" self.ORG_COLUMN = "Org Name" + self.column_order = ["Addgene Assigned", + "Plasmid ID", + "Deposit ID", + "Institute", + "PI Name", + "Date Received", + "Original Date", + "Original ORG", + "Original PI"] with open(credentials_file, "r") as cred_f: creds = json.loads(cred_f.read()) @@ -53,7 +67,9 @@ class AttributionReport(object): def _open_file_frame(self, filename, date_cols): if DROPBOX: + L.info("Getting file from Dropbox") client = dropbox.client.DropboxClient(self.access_token) + L.info("Got file from Dropbox") f = client.get_file(filename) else: f = os.path.normpath(local_dropbox_path + filename) @@ -103,7 +119,8 @@ class AttributionReport(object): name_match = filtered_df[filtered_df[filter_column] == filter_value] - output = [] + single = [] + double = [] if not name_match.empty: for _, row in name_match.iterrows(): data = { @@ -117,13 +134,19 @@ class AttributionReport(object): "Original ORG": pi_org, "Original PI": pi_name, } - output.append(data) - return output + if (data['Institute'] == data['Original ORG']) and \ + (data['PI Name'] == data['Original PI']): + double.append(data) + else: + single.append(data) + return single, double def get_attribution_dataframes(self): salesforce, dep = self.get_dataframes() name_matches = [] org_matches = [] + double_matches = [] + mismatches = [] # Iterate through the Salesforce report as the master document for index, sf_row in salesforce.iterrows(): @@ -142,26 +165,39 @@ class AttributionReport(object): pi_org = sf_row['Account Name'] # Get matches by the PI's name - by_name = self.get_filtered(filtered_df, - sf_row, - pi_name, - pi_org, - kind="PI") + by_name, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="PI") name_matches.extend(by_name) + mismatches.extend(by_name) + double_matches.extend(by_both) # Get matches by the organization name - by_org = self.get_filtered(filtered_df, - sf_row, - pi_name, - pi_org, - kind="ORG") + by_org, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="ORG") org_matches.extend(by_org) - return pd.DataFrame(name_matches), pd.DataFrame(org_matches) + mismatches.extend(by_org) + double_matches.extend(by_both) + + return {"PI": pd.DataFrame(name_matches, columns=self.column_order), + "Institute": pd.DataFrame(org_matches, columns=self.column_order), + "Double": pd.DataFrame(double_matches, columns=self.column_order), + "Single": pd.DataFrame(mismatches, columns=self.column_order)} def run(self): - name_df, org_df = self.get_attribution_dataframes() - name_df.to_excelv("names.xls") - org_df.to_excel("orgs.xls") + frames = self.get_attribution_dataframes() + + for key, df in frames.items(): + fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format( + datetime.date.today(), + key) + print "Writing", fname + writer = pd.ExcelWriter( + fname, + + engine='xlsxwriter') + df.to_excel(writer, + sheet_name='Sheet1', + index=False) + writer.save() + if __name__ == '__main__': report = AttributionReport(credentials_file="credentials.json", diff --git a/requirements.txt b/requirements.txt index 5bd0648..041d949 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,8 @@ dropbox pandas xlrd -python-dateutil \ No newline at end of file +python-dateutil +xlwt +#openpyxl==1.8.6 +xlsxwriter +progressbar==2.3 \ No newline at end of file