diff --git a/__pycache__/inputManager.cpython-39.pyc b/__pycache__/inputManager.cpython-39.pyc new file mode 100644 index 0000000..75fe8d5 Binary files /dev/null and b/__pycache__/inputManager.cpython-39.pyc differ diff --git a/__pycache__/inputPutManager.cpython-39.pyc b/__pycache__/inputPutManager.cpython-39.pyc new file mode 100644 index 0000000..94dd7b7 Binary files /dev/null and b/__pycache__/inputPutManager.cpython-39.pyc differ diff --git a/__pycache__/onBaseData.cpython-39.pyc b/__pycache__/onBaseData.cpython-39.pyc new file mode 100644 index 0000000..92ff618 Binary files /dev/null and b/__pycache__/onBaseData.cpython-39.pyc differ diff --git a/config.json b/config.json new file mode 100644 index 0000000..362e974 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "template": "_TEMPLATE_ACHVerReport.xlsx", + "output_dir": "../", + "sql_query": "OnBaseSearchQuery.txt" +} \ No newline at end of file diff --git a/inputManager.py b/inputManager.py new file mode 100644 index 0000000..82af968 --- /dev/null +++ b/inputManager.py @@ -0,0 +1,29 @@ +from datetime import datetime as dt +import re + +def get_login() -> str: + """ + Logs get's login info + """ + print("NOTE: This program requires the user to have read access to: LPP-SQL01.Onbase") + un = input("What is your LEAF login name? ") + pw = input("What is your LEAF password? ") + loginStr = f"{un.lower()}:{pw}" + return loginStr + +def get_timeframe(startDate: str = "09/27/2022", endDate: str = dt.now().strftime("%m/%d/%y")): + print(f"""\nCurrent report timeframe: +Start Date: {startDate}\nEnd Date: {endDate}""") + edit = input("Would you like to edit this? (y/n): ").lower() + while (edit != 'y' and edit != 'n'): + print(edit) + edit = input("Please enter y or n: ") + if edit == 'y': + startDate = input("Start Date (mm/dd/yyyy): ") + while re.search("\d{2}/\d{2}/\d{4}", startDate) == None: + startDate = input("Please enter a start date with the following format: mm/dd/yyyy\n") + endDate = input("End Date (mm/dd/yyyy): ") + while re.search("\d{2}/\d{2}/\d{4}", endDate) == None: + endDate = input("Please enter a end date with the following format: mm/dd/yyyy\n") + get_timeframe(startDate, endDate) + return startDate, endDate \ No newline at end of file diff --git a/main.py b/main.py index 13a1f44..c975e55 100644 --- a/main.py +++ b/main.py @@ -1,252 +1,28 @@ +import openpyxl as pxl +from openpyxl import load_workbook import pandas as pd +from datetime import datetime as dt from pprint import pprint as prt -import sqlalchemy as sqa -from datetime import timedelta, time,datetime as dt -import numpy as np -import businesstimedelta -import pytz +import os +import json +# Custom modules +import onBaseData +import inputManager +with open('config.json') as json_file: + config = json.load(json_file) -workday = businesstimedelta.WorkDayRule( - start_time= time(7), - end_time= time(18), - working_days=[0, 1, 2, 3, 4], - tz=pytz.timezone("US/Eastern")) +with open(config["sql_query"]) as sqlQFile: + sqlQuery = sqlQFile.read() -businesshrs = businesstimedelta.Rules([workday]) +loginStr = inputManager.get_login() +startDate, endDate = inputManager.get_timeframe(startDate= "09/27/2022", endDate= dt.now().strftime("%m/%d/%y")) +rawData = onBaseData.get_data(login=loginStr, startDate=startDate, endDate=endDate, sqlQuery=sqlQuery) +fullData = onBaseData.inital_data_processing(raw_report= rawData) -def pfd(dataframe): - with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also - prt(dataframe) +newReport = f"ACHVerificationReport {dt.now().strftime('%m-%d-%y')}.xlsx" +os.system(f"cp {config['template']} {config['output_dir']}'{newReport}'") -def login() -> str: - """ - Logs get's login info - """ - un = input("What is your LEAF login name? ") - pw = input("What is your LEAF password? ") - login_str = f"{un.lower()}:{pw}" - return login_str - -def get_timeframe(): - start_date = "09/27/2022" - end_date = dt.now().strftime("%m/%d/%y") - return start_date, end_date - -def get_data() -> pd.DataFrame: - connection_str = f"mssql+pymssql://leafnow.com\{login()}@LPP-SQL01" - try: - print(f"Connectiong to SQL database...") - with sqa.create_engine(connection_str).connect() as con: - start_date, end_date = get_timeframe() - print("Pulling data...") - query = f""" - use Onbase -select ---id.itemnum, -RTRIM(ki105.keyvaluechar) as ApplicationNum -,RTRIM(ki103.keyvaluechar) as CustName -,RTRIM(ki136.keyvaluechar) as SubmittedBy -,Rtrim(ki354.keyvaluechar) as MarketingRepEmail -,RTRIM(ki1076.keyvaluechar) as Status -,RTRIM(ki1456.keyvaluechar) as APRep -,RTRIM(ki1457.keyvaluetod) as StatusDateTime -,RTRIM(kgd426.kg749) as vendornum -,RTRIM(kgd426.kg750) as vendorname -,RTRIM(kgd426.kg1388) as firstattempt -,kgd426.kg1454 as firstattemptdate -,RTRIM(kgd426.kg1452) as firstattemptcomments -,RTRIM(kgd426.kg1389) as secondattempt -,kgd426.kg1455 as secondattemptdate -,RTRIM(kgd426.kg1453) as secondattemptcomments -,RTRIM(kgd426.kg1075) as leafemployee -,RTRIM(kgd426.kg1074) as vendorverifiedwith -,min(wf101.entrytime) as QueueEntryTime -,max(wf101.exittime) as QueueExitTime - - -from hsi.itemdata id ---join hsi.doctype dt on dt.itemtypenum = id.itemtypenum -join hsi.keyrecorddata426 kgd426 on kgd426.itemnum = id.itemnum -left outer join hsi.keyitem105 ki105 on ki105.itemnum = id.itemnum -left outer join hsi.keyitem103 ki103 on ki103.itemnum = id.itemnum -left outer join hsi.keyitem136 ki136 on ki136.itemnum = id.itemnum -left outer join hsi.keyitem354 ki354 on ki354.itemnum = id.itemnum -left outer join hsi.keyitem1076 ki1076 on ki1076.itemnum = id.itemnum -left outer join hsi.keyitem1457 ki1457 on ki1457.itemnum = id.itemnum -left outer join hsi.keyitem1456 ki1456 on ki1456.itemnum = id.itemnum -left outer join hsi.keyitem749 ki749 on ki749.itemnum = id.itemnum -left outer join hsi.keyitem750 ki750 on ki750.itemnum = id.itemnum -left outer join hsi.keyitem1388 ki1388 on ki1388.itemnum = id.itemnum -left outer join hsi.keyitem1454 ki1454 on ki1454.itemnum = id.itemnum -left outer join hsi.keyitem1452 ki1452 on ki1452.itemnum = id.itemnum -left outer join hsi.keyitem1389 ki1389 on ki1389.itemnum = id.itemnum -left outer join hsi.keyitem1455 ki1455 on ki1455.itemnum = id.itemnum -left outer join hsi.keyitem1453 ki1453 on ki1453.itemnum = id.itemnum -left outer join hsi.keyitem1075 ki1075 on ki1075.itemnum = id.itemnum -left outer join hsi.keyitem1074 ki1074 on ki1074.itemnum = id.itemnum ---PHL LifeCycle = 101; ACH Queue = 405 -join hsi.wflog wf101 on wf101.itemnum = id.itemnum and wf101.lcnum = '101' and wf101.statenum = '405' - -where id.itemtypenum = 535 -and id.status = 0 -and CONVERT(DATE,ki1457.keyvaluetod) BETWEEN '{start_date}' and '{end_date}' - -group by -ki105.keyvaluechar -,ki103.keyvaluechar -,ki136.keyvaluechar -,ki354.keyvaluechar -,ki1076.keyvaluechar -,ki1456.keyvaluechar -,ki1457.keyvaluetod -,kgd426.kg749 -,kgd426.kg750 -,kgd426.kg1388 -,kgd426.kg1454 -,kgd426.kg1452 -,kgd426.kg1389 -,kgd426.kg1455 -,kgd426.kg1453 -,kgd426.kg1075 -,kgd426.kg1074 - -UNION - -select ---id.itemnum, -RTRIM(ki105.keyvaluechar) as ApplicationNum -,RTRIM(ki103.keyvaluechar) as CustName -,RTRIM(ki136.keyvaluechar) as SubmittedBy -,Rtrim(ki354.keyvaluechar) as MarketingRepEmail -,RTRIM(ki1076.keyvaluechar) as Status -,RTRIM(ki1456.keyvaluechar) as APRep -,RTRIM(ki1457.keyvaluetod) as StatusDateTime -,RTRIM(kgd426.kg749) as vendornum -,RTRIM(kgd426.kg750) as vendorname -,RTRIM(kgd426.kg1388) as firstattempt -,RTRIM(kgd426.kg1454) as firstattemptdate -,RTRIM(kgd426.kg1452) as firstattemptcomments -,RTRIM(kgd426.kg1389) as secondattempt -,RTRIM(kgd426.kg1455) as secondattemptdate -,RTRIM(kgd426.kg1453) as secondattemptcomments -,RTRIM(kgd426.kg1075) as leafemployee -,RTRIM(kgd426.kg1074) as vendorverifiedwith -,min(wf106.entrytime) as QueueEntryTime -,max(wf106.exittime) as QueueExitTime - - -from hsi.itemdata id ---join hsi.doctype dt on dt.itemtypenum = id.itemtypenum -join hsi.keyrecorddata426 kgd426 on kgd426.itemnum = id.itemnum -left outer join hsi.keyitem105 ki105 on ki105.itemnum = id.itemnum -left outer join hsi.keyitem103 ki103 on ki103.itemnum = id.itemnum -left outer join hsi.keyitem136 ki136 on ki136.itemnum = id.itemnum -left outer join hsi.keyitem354 ki354 on ki354.itemnum = id.itemnum -left outer join hsi.keyitem1076 ki1076 on ki1076.itemnum = id.itemnum -left outer join hsi.keyitem1457 ki1457 on ki1457.itemnum = id.itemnum -left outer join hsi.keyitem1456 ki1456 on ki1456.itemnum = id.itemnum -left outer join hsi.keyitem749 ki749 on ki749.itemnum = id.itemnum -left outer join hsi.keyitem750 ki750 on ki750.itemnum = id.itemnum -left outer join hsi.keyitem1388 ki1388 on ki1388.itemnum = id.itemnum -left outer join hsi.keyitem1454 ki1454 on ki1454.itemnum = id.itemnum -left outer join hsi.keyitem1452 ki1452 on ki1452.itemnum = id.itemnum -left outer join hsi.keyitem1389 ki1389 on ki1389.itemnum = id.itemnum -left outer join hsi.keyitem1455 ki1455 on ki1455.itemnum = id.itemnum -left outer join hsi.keyitem1453 ki1453 on ki1453.itemnum = id.itemnum -left outer join hsi.keyitem1075 ki1075 on ki1075.itemnum = id.itemnum -left outer join hsi.keyitem1074 ki1074 on ki1074.itemnum = id.itemnum ---MOB LifeCycle = 106; ACH Queue = 417 -join hsi.wflog wf106 on wf106.itemnum = id.itemnum and wf106.lcnum = '106' and wf106.statenum = '417' - -where id.itemtypenum = 535 -and id.status = 0 -and CONVERT(DATE,ki1457.keyvaluetod) BETWEEN '09/27/2022' and '12/06/2022' - -group by -ki105.keyvaluechar -,ki103.keyvaluechar -,ki136.keyvaluechar -,ki354.keyvaluechar -,ki1076.keyvaluechar -,ki1456.keyvaluechar -,ki1457.keyvaluetod -,kgd426.kg749 -,kgd426.kg750 -,kgd426.kg1388 -,kgd426.kg1454 -,kgd426.kg1452 -,kgd426.kg1389 -,kgd426.kg1455 -,kgd426.kg1453 -,kgd426.kg1075 -,kgd426.kg1074 -Order by 1 - """ - try: - result = con.execute(query).all() - try: - dataframe = pd.DataFrame(result) - return dataframe - except: - print(f"Failed to create a dataframe from SQL result:\n{result}") - except Exception as e: - print(f"Failed to pull data from SQL:\n{query}\n{e}") - except Exception as e: - print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!") - -def stats_for_col(columns: str, df: pd.DataFrame) -> pd.DataFrame: - return pd.DataFrame({ - "Data" : [col for col in columns], - "Mean": [df[col].mean() for col in columns], - "Median": [df[col].median() for col in columns], - "Max": [df[col].max() for col in columns], - "Std": [df[col].std() for col in columns] - }) - -def process_data(raw_report: pd.DataFrame) -> pd.DataFrame: - - # Convert columns to datetime] - date_time_format = "%Y-%m-%d %H:%M:%S.%f" - raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p") - raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format) - raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format) - raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format) - raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format) - raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2 - if row.QueueExitTime > dt(1965,1,1) - and row.QueueExitTime > row.QueueEntryTime - else None, axis = 1) - raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2 - if - (row.secondattempt != None and row.firstattempt != None) and - (row.secondattemptdate > row.firstattemptdate) - else None, axis = 1) - raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2 - if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1) - else None, axis = 1) - - unique_deals = len(raw_report["ApplicationNum"].unique()) - ver_on_first = len(raw_report.query("firstattempt == 'VERIFICATION COMPLETED'")) - verified = len(raw_report.query("Status == 'VERIFICATION COMPLETED'")) - - failed_on_first = len(raw_report.query("firstattempt != 'VERIFICATION COMPLETED' & firstattempt != 'VERIFICATION IN PROCESS'")) - - col_stats = stats_for_col(["APTurnAround", "AttemptTimeDif", "TimeToFirstAttempt"], raw_report) - prt(col_stats) - print(f"\n# of deals: {unique_deals} | # payments {len(raw_report)}") - print(f"Verified on first: {ver_on_first} ({round(ver_on_first/len(raw_report),4) *100}%) | Failed on first: {failed_on_first} ({round(failed_on_first/len(raw_report),4) *100}%)") - - pt_by_ap_rep = pd.pivot_table(raw_report, index="APRep", values= ["APTurnAround","AttemptTimeDif","TimeToFirstAttempt"],aggfunc = [np.mean, np.median, np.max]) - prt(pt_by_ap_rep) - - return raw_report - - -ach_raw_report = get_data() -prt(ach_raw_report) -#pfd(ach_raw_report) -report_plus = process_data(ach_raw_report) -prt(report_plus) -report_plus.to_excel("test3.xlsx",index=False) +with pd.ExcelWriter(config['output_dir']+newReport, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: + fullData.to_excel(writer, sheet_name="raw_data") \ No newline at end of file diff --git a/onBaseData.py b/onBaseData.py new file mode 100644 index 0000000..e28d988 --- /dev/null +++ b/onBaseData.py @@ -0,0 +1,84 @@ +import pandas as pd +import sqlalchemy as sqa +import re +import businesstimedelta +import pytz +from datetime import time, datetime as dt +import sys + + +def get_data(login: str, startDate: str, endDate: str, sqlQuery: str) -> pd.DataFrame: + """ + Connects to the OnBase database on LPP-SQL01 and runs the query specified in OnBaseSearchQuery.txt + login : The login information used to authenticate with the MSSQL server. The user needs read permission on the Onbase database + startDate : The earliest dated record to pull. Format = mm/dd/yyyy + endDate : The most recent record to pull. Format = mm/dd/yyyy + """ + + connStr = f"mssql+pymssql://leafnow.com\{login}@LPP-SQL01" + try: + print(f"Connectiong to SQL database...") + with sqa.create_engine(connStr).connect() as con: + print("Pulling data...") + with open('OnBaseSearchQuery.txt') as obQueryFile: + filledQuery = sqlQuery.replace("REPLACE_START_DATE", startDate).replace("REPLACE_END_DATE", endDate) + try: + result = con.execute(filledQuery).all() + try: + dataframe = pd.DataFrame(result) + assert len(dataframe) > 0, f"No data in dataframe: {dataframe}\nQuery result: {result}" + return dataframe + except: + print(f"Failed to create a dataframe from SQL result:\n{result}") + sys.exit(2) + except Exception as e: + print(f"Failed to pull data from SQL:\n{filledQuery}\n{e}") + sys.exit(2) + except Exception as e: + print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!\tlogin: {login}") + sys.exit(2) + +def inital_data_processing(raw_report: pd.DataFrame) -> pd.DataFrame: + """ + Takes in a dataframe of ACH verification entries from the Onbase database. + This dataframe is based on the returns in the SQL query in OnBaseSearchQuery.txt. + + The return adds a number of columns to the data: + - APTurnAround: Total time between a report entering and exiting the ACH Verification queue + - AttemptTimeDif: Time between first and second attempt + - TimeToFirstAttempt: Time intil making the first attempt + + All time measurments are in business hours + """ + # Define business hours + # Currently 7am to 6pm eastern time + workday = businesstimedelta.WorkDayRule( + start_time= time(7), + end_time= time(18), + working_days=[0, 1, 2, 3, 4], + tz=pytz.timezone("US/Eastern")) + businesshrs = businesstimedelta.Rules([workday]) + + # Convert columns to datetime] + date_time_format = "%Y-%m-%d %H:%M:%S.%f" + raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p") + raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format) + raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format) + raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format) + raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format) + + # Add calculated time columns + # Check to make sure the columns being used are valid otherwise fill with None + raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2 + if row.QueueExitTime > dt(1965,1,1) + and row.QueueExitTime > row.QueueEntryTime + else None, axis = 1) + raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2 + if + (row.secondattempt != None and row.firstattempt != None) and + (row.secondattemptdate > row.firstattemptdate) + else None, axis = 1) + raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2 + if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1) + else None, axis = 1) + return raw_report \ No newline at end of file