before gui

2015-07-16 15:10:17 -04:00 · 2015-07-16 15:10:17 -04:00 · 0ca5217ddc
commit 0ca5217ddc
parent 53ee1caeee
2 changed files with 58 additions and 18 deletions
--- a/main.py
+++ b/main.py
@ -1,9 +1,14 @@
 import os
 import json
 import datetime
 import logging
 import pandas as pd
 import dropbox
 from dateutil.relativedelta import relativedelta
 import easygui
 L = logging.getLogger(__name__)
 DROPBOX = False
 local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
@ -18,6 +23,15 @@ class AttributionReport(object):
        self.footer_length = footer_length
        self.PI_COLUMN = "PI_Name"
        self.ORG_COLUMN = "Org Name"
        self.column_order = ["Addgene Assigned",
                             "Plasmid ID",
                             "Deposit ID",
                             "Institute",
                             "PI Name",
                             "Date Received",
                             "Original Date",
                             "Original ORG",
                             "Original PI"]
        with open(credentials_file, "r") as cred_f:
            creds = json.loads(cred_f.read())
@ -53,7 +67,9 @@ class AttributionReport(object):
    def _open_file_frame(self, filename, date_cols):
        if DROPBOX:
            L.info("Getting file from Dropbox")
            client = dropbox.client.DropboxClient(self.access_token)
            L.info("Got file from Dropbox")
            f = client.get_file(filename)
        else:
            f = os.path.normpath(local_dropbox_path + filename)
@ -103,7 +119,8 @@ class AttributionReport(object):
        name_match = filtered_df[filtered_df[filter_column] == filter_value]
-        output = []
+        single = []
        double = []
        if not name_match.empty:
            for _, row in name_match.iterrows():
                data = {
@ -117,13 +134,19 @@ class AttributionReport(object):
                    "Original ORG": pi_org,
                    "Original PI": pi_name,
                }
-                output.append(data)
+                if (data['Institute'] == data['Original ORG']) and \
-        return output
+                        (data['PI Name'] == data['Original PI']):
                    double.append(data)
                else:
                    single.append(data)
        return single, double
    def get_attribution_dataframes(self):
        salesforce, dep = self.get_dataframes()
        name_matches = []
        org_matches = []
        double_matches = []
        mismatches = []
        # Iterate through the Salesforce report as the master document
        for index, sf_row in salesforce.iterrows():
@ -142,26 +165,39 @@ class AttributionReport(object):
            pi_org = sf_row['Account Name']
            # Get matches by the PI's name
-            by_name = self.get_filtered(filtered_df,
+            by_name, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="PI")
                                        sf_row,
                                        pi_name,
                                        pi_org,
                                        kind="PI")
            name_matches.extend(by_name)
            mismatches.extend(by_name)
            double_matches.extend(by_both)
            # Get matches by the organization name
-            by_org = self.get_filtered(filtered_df,
+            by_org, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="ORG")
                                       sf_row,
                                       pi_name,
                                       pi_org,
                                       kind="ORG")
            org_matches.extend(by_org)
-        return pd.DataFrame(name_matches), pd.DataFrame(org_matches)
+            mismatches.extend(by_org)
            double_matches.extend(by_both)
        return {"PI": pd.DataFrame(name_matches, columns=self.column_order),
                "Institute": pd.DataFrame(org_matches, columns=self.column_order),
                "Double": pd.DataFrame(double_matches, columns=self.column_order),
                "Single": pd.DataFrame(mismatches, columns=self.column_order)}
    def run(self):
-        name_df, org_df = self.get_attribution_dataframes()
+        frames = self.get_attribution_dataframes()
-        name_df.to_excelv("names.xls")
+
-        org_df.to_excel("orgs.xls")
+        for key, df in frames.items():
            fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(
                datetime.date.today(),
                key)
            print "Writing", fname
            writer = pd.ExcelWriter(
                fname,
                engine='xlsxwriter')
            df.to_excel(writer,
                        sheet_name='Sheet1',
                        index=False)
            writer.save()
 if __name__ == '__main__':
    report = AttributionReport(credentials_file="credentials.json",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,8 @@
 dropbox
 pandas
 xlrd
-python-dateutil
+python-dateutil
 xlwt
 #openpyxl==1.8.6
 xlsxwriter
 progressbar==2.3