before gui

This commit is contained in:
Tyrel Souza 2015-07-16 15:10:17 -04:00
parent 53ee1caeee
commit 0ca5217ddc
2 changed files with 58 additions and 18 deletions

70
main.py
View File

@ -1,9 +1,14 @@
import os import os
import json import json
import datetime
import logging
import pandas as pd import pandas as pd
import dropbox import dropbox
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
import easygui
L = logging.getLogger(__name__)
DROPBOX = False DROPBOX = False
local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/" local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
@ -18,6 +23,15 @@ class AttributionReport(object):
self.footer_length = footer_length self.footer_length = footer_length
self.PI_COLUMN = "PI_Name" self.PI_COLUMN = "PI_Name"
self.ORG_COLUMN = "Org Name" self.ORG_COLUMN = "Org Name"
self.column_order = ["Addgene Assigned",
"Plasmid ID",
"Deposit ID",
"Institute",
"PI Name",
"Date Received",
"Original Date",
"Original ORG",
"Original PI"]
with open(credentials_file, "r") as cred_f: with open(credentials_file, "r") as cred_f:
creds = json.loads(cred_f.read()) creds = json.loads(cred_f.read())
@ -53,7 +67,9 @@ class AttributionReport(object):
def _open_file_frame(self, filename, date_cols): def _open_file_frame(self, filename, date_cols):
if DROPBOX: if DROPBOX:
L.info("Getting file from Dropbox")
client = dropbox.client.DropboxClient(self.access_token) client = dropbox.client.DropboxClient(self.access_token)
L.info("Got file from Dropbox")
f = client.get_file(filename) f = client.get_file(filename)
else: else:
f = os.path.normpath(local_dropbox_path + filename) f = os.path.normpath(local_dropbox_path + filename)
@ -103,7 +119,8 @@ class AttributionReport(object):
name_match = filtered_df[filtered_df[filter_column] == filter_value] name_match = filtered_df[filtered_df[filter_column] == filter_value]
output = [] single = []
double = []
if not name_match.empty: if not name_match.empty:
for _, row in name_match.iterrows(): for _, row in name_match.iterrows():
data = { data = {
@ -117,13 +134,19 @@ class AttributionReport(object):
"Original ORG": pi_org, "Original ORG": pi_org,
"Original PI": pi_name, "Original PI": pi_name,
} }
output.append(data) if (data['Institute'] == data['Original ORG']) and \
return output (data['PI Name'] == data['Original PI']):
double.append(data)
else:
single.append(data)
return single, double
def get_attribution_dataframes(self): def get_attribution_dataframes(self):
salesforce, dep = self.get_dataframes() salesforce, dep = self.get_dataframes()
name_matches = [] name_matches = []
org_matches = [] org_matches = []
double_matches = []
mismatches = []
# Iterate through the Salesforce report as the master document # Iterate through the Salesforce report as the master document
for index, sf_row in salesforce.iterrows(): for index, sf_row in salesforce.iterrows():
@ -142,26 +165,39 @@ class AttributionReport(object):
pi_org = sf_row['Account Name'] pi_org = sf_row['Account Name']
# Get matches by the PI's name # Get matches by the PI's name
by_name = self.get_filtered(filtered_df, by_name, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="PI")
sf_row,
pi_name,
pi_org,
kind="PI")
name_matches.extend(by_name) name_matches.extend(by_name)
mismatches.extend(by_name)
double_matches.extend(by_both)
# Get matches by the organization name # Get matches by the organization name
by_org = self.get_filtered(filtered_df, by_org, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="ORG")
sf_row,
pi_name,
pi_org,
kind="ORG")
org_matches.extend(by_org) org_matches.extend(by_org)
return pd.DataFrame(name_matches), pd.DataFrame(org_matches) mismatches.extend(by_org)
double_matches.extend(by_both)
return {"PI": pd.DataFrame(name_matches, columns=self.column_order),
"Institute": pd.DataFrame(org_matches, columns=self.column_order),
"Double": pd.DataFrame(double_matches, columns=self.column_order),
"Single": pd.DataFrame(mismatches, columns=self.column_order)}
def run(self): def run(self):
name_df, org_df = self.get_attribution_dataframes() frames = self.get_attribution_dataframes()
name_df.to_excelv("names.xls")
org_df.to_excel("orgs.xls") for key, df in frames.items():
fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(
datetime.date.today(),
key)
print "Writing", fname
writer = pd.ExcelWriter(
fname,
engine='xlsxwriter')
df.to_excel(writer,
sheet_name='Sheet1',
index=False)
writer.save()
if __name__ == '__main__': if __name__ == '__main__':
report = AttributionReport(credentials_file="credentials.json", report = AttributionReport(credentials_file="credentials.json",

View File

@ -1,4 +1,8 @@
dropbox dropbox
pandas pandas
xlrd xlrd
python-dateutil python-dateutil
xlwt
#openpyxl==1.8.6
xlsxwriter
progressbar==2.3