Initial commit
This commit is contained in:
commit
53ee1caeee
170
main.py
Normal file
170
main.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import dropbox
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
|
DROPBOX = False
|
||||||
|
local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
|
||||||
|
|
||||||
|
SF_DATE = "Date"
|
||||||
|
DP_DATE = "Date Received"
|
||||||
|
|
||||||
|
|
||||||
|
class AttributionReport(object):
|
||||||
|
def __init__(self, credentials_file, months=6, footer_length=None):
|
||||||
|
self.months = months
|
||||||
|
self.footer_length = footer_length
|
||||||
|
self.PI_COLUMN = "PI_Name"
|
||||||
|
self.ORG_COLUMN = "Org Name"
|
||||||
|
|
||||||
|
with open(credentials_file, "r") as cred_f:
|
||||||
|
creds = json.loads(cred_f.read())
|
||||||
|
|
||||||
|
self.app_key = creds['app_key']
|
||||||
|
self.app_secret = creds['app_secret']
|
||||||
|
|
||||||
|
if not creds.get("access_token", None):
|
||||||
|
self.authorize()
|
||||||
|
else:
|
||||||
|
self.access_token = creds['access_token']
|
||||||
|
self.user_id = creds['user_id']
|
||||||
|
|
||||||
|
def authorize(self):
|
||||||
|
flow = dropbox.client.DropboxOAuth2FlowNoRedirect(self.app_key, self.app_secret)
|
||||||
|
authorize_url = flow.start()
|
||||||
|
print '1. Go to: ' + authorize_url
|
||||||
|
print '2. Click "Allow" (you might have to log in first)'
|
||||||
|
print '3. Copy the authorization code.'
|
||||||
|
code = raw_input("Enter the authorization code here: ").strip()
|
||||||
|
access_token, user_id = flow.finish(code)
|
||||||
|
self.access_token = access_token
|
||||||
|
self.user_id = user_id
|
||||||
|
|
||||||
|
creds = {"app_key": self.app_key,
|
||||||
|
"app_secret": self.app_secret,
|
||||||
|
"access_token": self.access_token,
|
||||||
|
"user_id": self.user_id}
|
||||||
|
|
||||||
|
# Save so we don't have to do this again.
|
||||||
|
with open("credentials.json", "w") as f:
|
||||||
|
f.write(json.dumps(creds))
|
||||||
|
|
||||||
|
def _open_file_frame(self, filename, date_cols):
|
||||||
|
if DROPBOX:
|
||||||
|
client = dropbox.client.DropboxClient(self.access_token)
|
||||||
|
f = client.get_file(filename)
|
||||||
|
else:
|
||||||
|
f = os.path.normpath(local_dropbox_path + filename)
|
||||||
|
|
||||||
|
if filename[-4:] == ".csv":
|
||||||
|
df = pd.read_csv(f, parse_dates=date_cols, encoding='utf-8')
|
||||||
|
else:
|
||||||
|
df = pd.read_excel(f, parse_dates=date_cols, encoding='utf-8')
|
||||||
|
return df
|
||||||
|
|
||||||
|
def get_dataframes(self):
|
||||||
|
"""
|
||||||
|
This gets the Salesforce and the Deposit dataframes.
|
||||||
|
Then it does some cleanup of the columns
|
||||||
|
"""
|
||||||
|
salesforce_data_name = '/Addgene Shared/Dev/Attribution Report/salesforce_report.xlsx'
|
||||||
|
salesforce_df = self._open_file_frame(salesforce_data_name, date_cols=[4, 5])
|
||||||
|
|
||||||
|
if self.footer_length:
|
||||||
|
length_with_footer = len(salesforce_df.index)
|
||||||
|
salesforce_df = salesforce_df.head(length_with_footer - self.footer_length)
|
||||||
|
|
||||||
|
deposit_data_name = 'Addgene Shared/Dev/Attribution Report/deposit_data.csv'
|
||||||
|
deposit_df = self._open_file_frame(deposit_data_name, date_cols=[7, 8])
|
||||||
|
|
||||||
|
# Clean up Salesforce
|
||||||
|
salesforce_df['Account Description'].fillna('', inplace=True)
|
||||||
|
salesforce_df.sort(SF_DATE, ascending=1)
|
||||||
|
salesforce_df["Full Name"] = salesforce_df["First Name"].map(unicode) + " " + salesforce_df["Last Name"]
|
||||||
|
del salesforce_df["First Name"]
|
||||||
|
del salesforce_df["Last Name"]
|
||||||
|
|
||||||
|
# Cleanup Deposit Data
|
||||||
|
deposit_df['Org Name'].fillna('', inplace=True)
|
||||||
|
deposit_df.sort(DP_DATE, ascending=1)
|
||||||
|
deposit_df['PI_Name'].astype(unicode)
|
||||||
|
|
||||||
|
return salesforce_df, deposit_df
|
||||||
|
|
||||||
|
def get_filtered(self, filtered_df, sf_row, pi_name, pi_org, kind):
|
||||||
|
if kind == "PI":
|
||||||
|
filter_column = self.PI_COLUMN
|
||||||
|
filter_value = pi_name
|
||||||
|
elif kind == "ORG":
|
||||||
|
filter_column = self.ORG_COLUMN
|
||||||
|
filter_value = pi_org
|
||||||
|
|
||||||
|
name_match = filtered_df[filtered_df[filter_column] == filter_value]
|
||||||
|
|
||||||
|
output = []
|
||||||
|
if not name_match.empty:
|
||||||
|
for _, row in name_match.iterrows():
|
||||||
|
data = {
|
||||||
|
"Addgene Assigned": sf_row['Assigned'],
|
||||||
|
"Plasmid ID": row['Plasmid ID'],
|
||||||
|
"Deposit ID": row['Deposit ID'],
|
||||||
|
"Institute": row['Org Name'],
|
||||||
|
"PI Name": row['PI_Name'],
|
||||||
|
"Date Received": row[DP_DATE],
|
||||||
|
"Original Date": sf_row[SF_DATE],
|
||||||
|
"Original ORG": pi_org,
|
||||||
|
"Original PI": pi_name,
|
||||||
|
}
|
||||||
|
output.append(data)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def get_attribution_dataframes(self):
|
||||||
|
salesforce, dep = self.get_dataframes()
|
||||||
|
name_matches = []
|
||||||
|
org_matches = []
|
||||||
|
|
||||||
|
# Iterate through the Salesforce report as the master document
|
||||||
|
for index, sf_row in salesforce.iterrows():
|
||||||
|
# Get a start date and an end date for filtering.
|
||||||
|
start_date = sf_row[SF_DATE]
|
||||||
|
end_date = start_date + relativedelta(months=self.months)
|
||||||
|
|
||||||
|
start = dep[DP_DATE].searchsorted(start_date)[0]
|
||||||
|
end = dep[DP_DATE].searchsorted(end_date)[0]
|
||||||
|
|
||||||
|
# Filter the deposit data to grab only things within that timeframe.
|
||||||
|
filtered_df = dep.ix[start:end]
|
||||||
|
|
||||||
|
# Variables for short names, and not having to type index a lot.
|
||||||
|
pi_name = unicode(sf_row['Full Name'])
|
||||||
|
pi_org = sf_row['Account Name']
|
||||||
|
|
||||||
|
# Get matches by the PI's name
|
||||||
|
by_name = self.get_filtered(filtered_df,
|
||||||
|
sf_row,
|
||||||
|
pi_name,
|
||||||
|
pi_org,
|
||||||
|
kind="PI")
|
||||||
|
name_matches.extend(by_name)
|
||||||
|
|
||||||
|
# Get matches by the organization name
|
||||||
|
by_org = self.get_filtered(filtered_df,
|
||||||
|
sf_row,
|
||||||
|
pi_name,
|
||||||
|
pi_org,
|
||||||
|
kind="ORG")
|
||||||
|
org_matches.extend(by_org)
|
||||||
|
return pd.DataFrame(name_matches), pd.DataFrame(org_matches)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
name_df, org_df = self.get_attribution_dataframes()
|
||||||
|
name_df.to_excelv("names.xls")
|
||||||
|
org_df.to_excel("orgs.xls")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
report = AttributionReport(credentials_file="credentials.json",
|
||||||
|
months=6,
|
||||||
|
footer_length=6)
|
||||||
|
report.run()
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
dropbox
|
||||||
|
pandas
|
||||||
|
xlrd
|
||||||
|
python-dateutil
|
Loading…
Reference in New Issue
Block a user