before gui

This commit is contained in:
Tyrel Souza 2015-07-16 15:10:17 -04:00
parent 53ee1caeee
commit 0ca5217ddc
2 changed files with 58 additions and 18 deletions

70
main.py
View File

@ -1,9 +1,14 @@
import os
import json
import datetime
import logging
import pandas as pd
import dropbox
from dateutil.relativedelta import relativedelta
import easygui
L = logging.getLogger(__name__)
DROPBOX = False
local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
@ -18,6 +23,15 @@ class AttributionReport(object):
self.footer_length = footer_length
self.PI_COLUMN = "PI_Name"
self.ORG_COLUMN = "Org Name"
self.column_order = ["Addgene Assigned",
"Plasmid ID",
"Deposit ID",
"Institute",
"PI Name",
"Date Received",
"Original Date",
"Original ORG",
"Original PI"]
with open(credentials_file, "r") as cred_f:
creds = json.loads(cred_f.read())
@ -53,7 +67,9 @@ class AttributionReport(object):
def _open_file_frame(self, filename, date_cols):
if DROPBOX:
L.info("Getting file from Dropbox")
client = dropbox.client.DropboxClient(self.access_token)
L.info("Got file from Dropbox")
f = client.get_file(filename)
else:
f = os.path.normpath(local_dropbox_path + filename)
@ -103,7 +119,8 @@ class AttributionReport(object):
name_match = filtered_df[filtered_df[filter_column] == filter_value]
output = []
single = []
double = []
if not name_match.empty:
for _, row in name_match.iterrows():
data = {
@ -117,13 +134,19 @@ class AttributionReport(object):
"Original ORG": pi_org,
"Original PI": pi_name,
}
output.append(data)
return output
if (data['Institute'] == data['Original ORG']) and \
(data['PI Name'] == data['Original PI']):
double.append(data)
else:
single.append(data)
return single, double
def get_attribution_dataframes(self):
salesforce, dep = self.get_dataframes()
name_matches = []
org_matches = []
double_matches = []
mismatches = []
# Iterate through the Salesforce report as the master document
for index, sf_row in salesforce.iterrows():
@ -142,26 +165,39 @@ class AttributionReport(object):
pi_org = sf_row['Account Name']
# Get matches by the PI's name
by_name = self.get_filtered(filtered_df,
sf_row,
pi_name,
pi_org,
kind="PI")
by_name, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="PI")
name_matches.extend(by_name)
mismatches.extend(by_name)
double_matches.extend(by_both)
# Get matches by the organization name
by_org = self.get_filtered(filtered_df,
sf_row,
pi_name,
pi_org,
kind="ORG")
by_org, by_both = self.get_filtered(filtered_df, sf_row, pi_name, pi_org, kind="ORG")
org_matches.extend(by_org)
return pd.DataFrame(name_matches), pd.DataFrame(org_matches)
mismatches.extend(by_org)
double_matches.extend(by_both)
return {"PI": pd.DataFrame(name_matches, columns=self.column_order),
"Institute": pd.DataFrame(org_matches, columns=self.column_order),
"Double": pd.DataFrame(double_matches, columns=self.column_order),
"Single": pd.DataFrame(mismatches, columns=self.column_order)}
def run(self):
name_df, org_df = self.get_attribution_dataframes()
name_df.to_excelv("names.xls")
org_df.to_excel("orgs.xls")
frames = self.get_attribution_dataframes()
for key, df in frames.items():
fname = '{0}_Attribution_Report_{1}_Match.xlsx'.format(
datetime.date.today(),
key)
print "Writing", fname
writer = pd.ExcelWriter(
fname,
engine='xlsxwriter')
df.to_excel(writer,
sheet_name='Sheet1',
index=False)
writer.save()
if __name__ == '__main__':
report = AttributionReport(credentials_file="credentials.json",

View File

@ -1,4 +1,8 @@
dropbox
pandas
xlrd
python-dateutil
python-dateutil
xlwt
#openpyxl==1.8.6
xlsxwriter
progressbar==2.3