This commit is contained in:
Tyrel Souza 2015-07-16 16:24:08 -04:00
parent 66b5fd6b31
commit 408e2a8de5

74
main.py
View File

@ -10,61 +10,32 @@ import easygui
L = logging.getLogger(__name__) L = logging.getLogger(__name__)
DROPBOX = False
local_dropbox_path = "/Users/tyrelsouza/Dropbox (Addgene)/"
SF_DATE = "Date"
DP_DATE = "Date Received"
class AttributionReport(object): class AttributionReport(object):
def __init__(self, credentials_file, months=6, footer_length=None): def __init__(self, credentials_file, months=6, footer_length=None):
self.months = months self.months = months
self.footer_length = footer_length self.footer_length = footer_length
self.SF_DATE_COLUMN = "Date"
self.DP_DATE_COLUMN = "Date Received"
self.PI_COLUMN = "PI_Name" self.PI_COLUMN = "PI_Name"
self.ORG_COLUMN = "Org Name" self.ORG_COLUMN = "Org Name"
self.OUTPUT_COLUMN_ORDER = ["Addgene Assigned", # Output the XLSX in this order
"Plasmid ID",
"Deposit ID",
"Institute",
"PI Name",
"Date Received",
"Original Date",
"Original ORG",
"Original PI"]
self.ACCEPTABLE_EXTENSIONS = ["*.csv", self.OUTPUT_COLUMN_ORDER = ["Addgene Assigned", "Plasmid ID", "Deposit ID", "Institute", "PI Name",
"*.xls", "Date Received", "Original Date", "Original ORG", "Original PI"]
"*.xlsx"]
self.ACCEPTABLE_EXTENSIONS = ["*.csv", "*.xls", "*.xlsx"]
# columns that need to be in the files # columns that need to be in the files
self.REQUIRED_SF_COLUMNS = ["First Name", self.REQUIRED_SF_COLUMNS = ["First Name", "Last Name", "Account Name", "Date", "Assigned"]
"Last Name", self.REQUIRED_DP_COLUMNS = ["Org Name", "Deposit ID", "Plasmid ID", "PI_Name", "Date Received"]
"Account Name",
"Date",
"Assigned"]
self.REQUIRED_DP_COLUMNS = ["Org Name",
"Deposit ID",
"Plasmid ID",
"PI_Name",
"Date Received"]
# After load and merging, delete these columns # After load and merging, delete these columns
self.SF_TRIM_COLUMNS = ["Subject", self.SF_TRIM_COLUMNS = ["Subject", "First Name", "Last Name", "Created Date", "LIMS Organization ID",
"First Name",
"Last Name",
"Created Date",
"LIMS Organization ID",
"Account Description"] "Account Description"]
self.DP_TRIM_COLUMNS = ["Org ID", self.DP_TRIM_COLUMNS = ["Org ID", "Deposit Status", "PI_ID", "Date Available", "# Orders",
"Deposit Status", "# Plasmids in the Deposit", "Addgene Contact", "Country"]
"PI_ID",
"Date Available",
"# Orders",
"# Plasmids in the Deposit",
"Addgene Contact",
"Country"]
self.DEFAULT_DIR = self.get_dropbox_dir() self.DEFAULT_DIR = self.get_dropbox_dir()
@ -119,12 +90,12 @@ class AttributionReport(object):
salesforce_df = salesforce_df.head(length_with_footer - self.footer_length) salesforce_df = salesforce_df.head(length_with_footer - self.footer_length)
# Clean up Salesforce # Clean up Salesforce
salesforce_df.sort(SF_DATE, ascending=1) salesforce_df.sort(self.SF_DATE_COLUMN, ascending=1)
salesforce_df["Full Name"] = salesforce_df["First Name"].map(unicode) + " " + salesforce_df["Last Name"] salesforce_df["Full Name"] = salesforce_df["First Name"].map(unicode) + " " + salesforce_df["Last Name"]
# Cleanup Deposit Data # Cleanup Deposit Data
deposit_df['Org Name'].fillna('', inplace=True) deposit_df['Org Name'].fillna('', inplace=True)
deposit_df.sort(DP_DATE, ascending=1) deposit_df.sort(self.DP_DATE_COLUMN, ascending=1)
deposit_df['PI_Name'].astype(unicode) deposit_df['PI_Name'].astype(unicode)
# Cleanup not needed columns # Cleanup not needed columns
@ -194,8 +165,8 @@ class AttributionReport(object):
"Deposit ID": row['Deposit ID'], "Deposit ID": row['Deposit ID'],
"Institute": row['Org Name'], "Institute": row['Org Name'],
"PI Name": row['PI_Name'], "PI Name": row['PI_Name'],
"Date Received": row[DP_DATE], "Date Received": row[self.DP_DATE_COLUMN],
"Original Date": sf_row[SF_DATE], "Original Date": sf_row[self.SF_DATE_COLUMN],
"Original ORG": pi_org, "Original ORG": pi_org,
"Original PI": pi_name, "Original PI": pi_name,
} }
@ -216,11 +187,11 @@ class AttributionReport(object):
# Iterate through the Salesforce report as the master document # Iterate through the Salesforce report as the master document
for index, sf_row in salesforce.iterrows(): for index, sf_row in salesforce.iterrows():
# Get a start date and an end date for filtering. # Get a start date and an end date for filtering.
start_date = sf_row[SF_DATE] start_date = sf_row[self.SF_DATE_COLUMN]
end_date = start_date + relativedelta(months=self.months) end_date = start_date + relativedelta(months=self.months)
start = dep[DP_DATE].searchsorted(start_date)[0] start = dep[self.DP_DATE_COLUMN].searchsorted(start_date)[0]
end = dep[DP_DATE].searchsorted(end_date)[0] end = dep[self.DP_DATE_COLUMN].searchsorted(end_date)[0]
# Filter the deposit data to grab only things within that timeframe. # Filter the deposit data to grab only things within that timeframe.
filtered_df = dep.ix[start:end] filtered_df = dep.ix[start:end]
@ -272,11 +243,12 @@ class AttributionReport(object):
def main(): def main():
try: try:
report = AttributionReport(credentials_file="credentials.json", report = AttributionReport(credentials_file="credentials.json",
months=6, months=6,
footer_length=6) footer_length=6)
report.run() report.run()
except: except:
easygui.exceptionbox() easygui.exceptionbox()
if __name__ == '__main__': if __name__ == '__main__':
main() main()