From 0ca5217ddc55e9a7d862ae8d07f8d3ad635fbf78 Mon Sep 17 00:00:00 2001
From: Tyrel Souza <tyrel@addgene.org>
Date: Thu, 16 Jul 2015 15:10:17 -0400
Subject: [PATCH] before gui

---
 main.py          | 70 ++++++++++++++++++++++++++++++++++++------------
 requirements.txt |  6 ++++-
 2 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/main.py b/main.py
index f569157..6944483 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,14 @@
 import os
 import json
+import datetime
+import logging
 
 import pandas as pd
 import dropbox
 from dateutil.relativedelta import relativedelta
+import easygui
+
+L = logging.getLogger(__name__)
 
 DROPBOX = False
 local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
@@ -18,6 +23,15 @@ class AttributionReport(object):
         self.footer_length = footer_length
         self.PI_COLUMN = "PI_Name"
         self.ORG_COLUMN = "Org Name"
+        self.column_order = ["Addgene Assigned",
+                             "Plasmid ID",
+                             "Deposit ID",
+                             "Institute",
+                             "PI Name",
+                             "Date Received",
+                             "Original Date",
+                             "Original ORG",
+                             "Original PI"]
 
         with open(credentials_file, "r") as cred_f:
             creds = json.loads(cred_f.read())
@@ -53,7 +67,9 @@ class AttributionReport(object):
 
     def _open_file_frame(self, filename, date_cols):
         if DROPBOX:
+            L.info("Getting file from Dropbox")
             client = dropbox.client.DropboxClient(self.access_token)
+            L.info("Got file from Dropbox")
             f = client.get_file(filename)
         else:
             f = os.path.normpath(local_dropbox_path + filename)
@@ -103,7 +119,8 @@ class AttributionReport(object):
 
         name_match = filtered_df[filtered_df[filter_column] == filter_value]
 
-        output = []
+        single = []
+        double = []
         if not name_match.empty:
             for _, row in name_match.iterrows():
                 data = {
@@ -117,13 +134,19 @@ class AttributionReport(object):
                     "Original ORG": pi_org,
                     "Original PI": pi_name,
                 }
-                output.append(data)
-        return output
+                if (data['Institute'] == data['Original ORG']) and \
+                        (data['PI Name'] == data['Original PI']):
+                    double.append(data)
+                else:
+                    single.append(data)
+        return single, double
 
     def get_attribution_dataframes(self):
         salesforce, dep = self.get_dataframes()
         name_matches = []
         org_matches = []
+        double_matches = []
+        mismatches = []
 
         # Iterate through the Salesforce report as the master document
         for index, sf_row in salesforce.iterrows():
@@ -142,26 +165,39 @@ class AttributionReport(object):
             pi_org = sf_row['Account Name']
 
             # Get matches by the PI's name
-            by_name = self.get_filtered(filtered_df,
-                                        sf_row,
-                                        pi_name,
-                                        pi_org,
-                                        kind="PI")
+            by_name, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="PI")
             name_matches.extend(by_name)
+            mismatches.extend(by_name)
+            double_matches.extend(by_both)
 
             # Get matches by the organization name
-            by_org = self.get_filtered(filtered_df,
-                                       sf_row,
-                                       pi_name,
-                                       pi_org,
-                                       kind="ORG")
+            by_org, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="ORG")
             org_matches.extend(by_org)
-        return pd.DataFrame(name_matches), pd.DataFrame(org_matches)
+            mismatches.extend(by_org)
+            double_matches.extend(by_both)
+
+        return {"PI": pd.DataFrame(name_matches, columns=self.column_order),
+                "Institute": pd.DataFrame(org_matches, columns=self.column_order),
+                "Double": pd.DataFrame(double_matches, columns=self.column_order),
+                "Single": pd.DataFrame(mismatches, columns=self.column_order)}
 
     def run(self):
-        name_df, org_df = self.get_attribution_dataframes()
-        name_df.to_excelv("names.xls")
-        org_df.to_excel("orgs.xls")
+        frames = self.get_attribution_dataframes()
+
+        for key, df in frames.items():
+            fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(
+                datetime.date.today(),
+                key)
+            print "Writing", fname
+            writer = pd.ExcelWriter(
+                fname,
+
+                engine='xlsxwriter')
+            df.to_excel(writer,
+                        sheet_name='Sheet1',
+                        index=False)
+            writer.save()
+
 
 if __name__ == '__main__':
     report = AttributionReport(credentials_file="credentials.json",
diff --git a/requirements.txt b/requirements.txt
index 5bd0648..041d949 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,8 @@
 dropbox
 pandas
 xlrd
-python-dateutil
\ No newline at end of file
+python-dateutil
+xlwt
+#openpyxl==1.8.6
+xlsxwriter
+progressbar==2.3
\ No newline at end of file