before gui

2015-07-16 15:10:17 -04:00 · 2015-07-16 15:10:17 -04:00 · 0ca5217ddc
commit 0ca5217ddc
parent 53ee1caeee
2 changed files with 58 additions and 18 deletions
--- a/main.py
+++ b/main.py
@ -1,9 +1,14 @@
 import os
 import json
+import datetime
+import logging

 import pandas as pd
 import dropbox
 from dateutil.relativedelta import relativedelta
+import easygui
+
+L = logging.getLogger(__name__)

 DROPBOX = False
 local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
@ -18,6 +23,15 @@ class AttributionReport(object):
        self.footer_length = footer_length
        self.PI_COLUMN = "PI_Name"
        self.ORG_COLUMN = "Org Name"
+        self.column_order = ["Addgene Assigned",
+                             "Plasmid ID",
+                             "Deposit ID",
+                             "Institute",
+                             "PI Name",
+                             "Date Received",
+                             "Original Date",
+                             "Original ORG",
+                             "Original PI"]

        with open(credentials_file, "r") as cred_f:
            creds = json.loads(cred_f.read())
@ -53,7 +67,9 @@ class AttributionReport(object):

    def _open_file_frame(self, filename, date_cols):
        if DROPBOX:
+            L.info("Getting file from Dropbox")
            client = dropbox.client.DropboxClient(self.access_token)
+            L.info("Got file from Dropbox")
            f = client.get_file(filename)
        else:
            f = os.path.normpath(local_dropbox_path + filename)
@ -103,7 +119,8 @@ class AttributionReport(object):

        name_match = filtered_df[filtered_df[filter_column] == filter_value]

-        output = []
+        single = []
+        double = []
        if not name_match.empty:
            for _, row in name_match.iterrows():
                data = {
@ -117,13 +134,19 @@ class AttributionReport(object):
                    "Original ORG": pi_org,
                    "Original PI": pi_name,
                }
-                output.append(data)
-        return output
+                if (data['Institute'] == data['Original ORG']) and \
+                        (data['PI Name'] == data['Original PI']):
+                    double.append(data)
+                else:
+                    single.append(data)
+        return single, double

    def get_attribution_dataframes(self):
        salesforce, dep = self.get_dataframes()
        name_matches = []
        org_matches = []
+        double_matches = []
+        mismatches = []

        # Iterate through the Salesforce report as the master document
        for index, sf_row in salesforce.iterrows():
@ -142,26 +165,39 @@ class AttributionReport(object):
            pi_org = sf_row['Account Name']

            # Get matches by the PI's name
-            by_name = self.get_filtered(filtered_df,
-                                        sf_row,
-                                        pi_name,
-                                        pi_org,
-                                        kind="PI")
+            by_name, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="PI")
            name_matches.extend(by_name)
+            mismatches.extend(by_name)
+            double_matches.extend(by_both)

            # Get matches by the organization name
-            by_org = self.get_filtered(filtered_df,
-                                       sf_row,
-                                       pi_name,
-                                       pi_org,
-                                       kind="ORG")
+            by_org, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="ORG")
            org_matches.extend(by_org)
-        return pd.DataFrame(name_matches), pd.DataFrame(org_matches)
+            mismatches.extend(by_org)
+            double_matches.extend(by_both)
+
+        return {"PI": pd.DataFrame(name_matches, columns=self.column_order),
+                "Institute": pd.DataFrame(org_matches, columns=self.column_order),
+                "Double": pd.DataFrame(double_matches, columns=self.column_order),
+                "Single": pd.DataFrame(mismatches, columns=self.column_order)}

    def run(self):
-        name_df, org_df = self.get_attribution_dataframes()
-        name_df.to_excelv("names.xls")
-        org_df.to_excel("orgs.xls")
+        frames = self.get_attribution_dataframes()
+
+        for key, df in frames.items():
+            fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(
+                datetime.date.today(),
+                key)
+            print "Writing", fname
+            writer = pd.ExcelWriter(
+                fname,
+
+                engine='xlsxwriter')
+            df.to_excel(writer,
+                        sheet_name='Sheet1',
+                        index=False)
+            writer.save()
+

 if __name__ == '__main__':
    report = AttributionReport(credentials_file="credentials.json",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,8 @@
 dropbox
 pandas
 xlrd
-python-dateutil
+python-dateutil
+xlwt
+#openpyxl==1.8.6
+xlsxwriter
+progressbar==2.3