From 6fef1ebbfb38d025266aa04d3bcf750614aef317 Mon Sep 17 00:00:00 2001 From: Griffiths Lott Date: Tue, 13 Dec 2022 16:37:35 -0500 Subject: [PATCH] Finished v1 | seperated core functions, extracted query, added config, using template --- __pycache__/inputManager.cpython-39.pyc | Bin 0 -> 1390 bytes __pycache__/inputPutManager.cpython-39.pyc | Bin 0 -> 1447 bytes __pycache__/onBaseData.cpython-39.pyc | Bin 0 -> 3935 bytes config.json | 5 + inputManager.py | 29 +++ main.py | 264 ++------------------- onBaseData.py | 84 +++++++ 7 files changed, 138 insertions(+), 244 deletions(-) create mode 100644 __pycache__/inputManager.cpython-39.pyc create mode 100644 __pycache__/inputPutManager.cpython-39.pyc create mode 100644 __pycache__/onBaseData.cpython-39.pyc create mode 100644 config.json create mode 100644 inputManager.py create mode 100644 onBaseData.py diff --git a/__pycache__/inputManager.cpython-39.pyc b/__pycache__/inputManager.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75fe8d52bcb774922c9a66502b487d0ce70df672 GIT binary patch literal 1390 zcmb7DL2n#26t+FHyOT*$wM7UaA@HG5yN71OsuE&_nwCupNNEz9LZ#J6$gI7)qs-VF z+e6r4b4t&gE44_QBY$bG91sV@y%%_IlC-6qcq6}Ozh}Sq?C%?~x*8LZKYxEV{3|5n zH(b0a;3M~7xCfvlku)PY8BmIImazd-fec87EP;sVf7GWmlD7PM<0e;2j!uIQ>gB#WT*55 zZ2*?e$%37c=R}5c`ked{oUl1oYLp+v9Aw9g9Um)y)A7uVQ_WL7GIgYUxV!gwyT>0- zQpcy(jIGGIRYyf?mE(S*c;S@g-tdX|K|w@FE`~$pAZU7gXMg|p!Ph$*AFuD}?}bz4 zhfgNLbI3I_h2=Zj_a9tT)FM}#y!_z5@u_g`*jTyAU(#MXs2Wq7YF{-{JuSdw=2%&9 zvqBHp^tg)Z8ytB1E3t^(%OCr_`%&H%jjJzvSO4n~u_<@Tt+sBQ-VFr+S?9jZ{h9(LN6596 z-STGKFRX=k1)ku;apd+07^Qmg0p6G6J$t-dj<>bET-{cxZufXaXBE}uyZe~}nsKNBtekOUxrTi2zs}BY<#|WS&I|<3mDdvaQ}?9vO5Zst z@BJ%@;58H+|3!YB`bo|Dk;#B~fIJ@=n+xCLuhYimdzbCMUG45$sR7#mlepcq2)b>e zYJsYlRaMg|VTY4y73ucsN!x*4#cfw`TZk`K{|@JB86)q2!l=6eiqMdl5wBA`vSly}gE$zLMxsL^S13?aDB=XN`v4SMjok(i1Qe`DNel0i zGM8&ZTlwVtfF6q`0i@pgdwSbrd+J}vrJY&FNt&KYfx{s=JDlNslxQ|X0_~6Ae;&Ud z5b`HZUTr>1?m|x=fRIGem?U&aDe5d{2^)H)?#2GlSAq1OP#MU|BY#+tRpqU!YqF}z z+L}(73Shtfgvn5DJPL+&=^c<(<1g?_TC}K+O>C@^pw^^t%0-FVgGt3ImsveVv&-gz zg1rYl?SOE^QAy9?M~P#SozpWMJs9bdth_aOOr*c0kI7T-j4c@)9ONH{99rK@Q$AJh zM#^I|jWmz+#FRVv{k{Fe-7Y_zMJb^h-uvSDiCQFTo9FNScYiL@^we0n z&7aY3%P)etjkL22QZ9l>&ofs9u{l+?2*inLZs>F z%f4}Lw6E2UjUeJ%7eVw)k|;ZlM9h0)9yt+@cFZ_Sly>Q8i+`>80FsBaM)!JO@$Zz4 zCec{9$mr1nH8<8pdO9kD8D#Fh&|<3W?fIgp!>xW_1_NM`wB*ch&>H-%QQL&IJly)M z-TAcL+3Ixi+Gf(;leM#0#k|Sm{ ztcSRMdauMNtD$Fsq76s)QZSiY9XR z;$~Z(y@FqFtxSpbVIgjSytCSmJX@w#O{I+1`JwvvguHy za!19HAn#%6u@2P0heZEIYQQ8h2sXHFOclZ7M{Tvm{ z&bkJ!AN~8Ut(VRl#=q!g`YPh(4K&q7#|_RR!)N+w`j&p$zOA1Hzo4H*zlf(9l{${^ zbV`1i<@;RU6_wG=4rjRaNrBtE@Sg3@3cOR}#ZOFL;tpt4&bEwt`3zExjk=lHY1k2& z!-G`Ns9wklQL-P#Sy`olOjkkAN+RZZ;Hdl2t)7tmtSCj=lX1%?KBKFGC$$wc^*08y z*wFYGDt~Gn+aI#Qo2kLAM~oRMJ28j$DLb)FjStxe7mf>OGES2JTBcaxMLf(@yKFET)Sc@zGN_c-$g$d3=1d4VF`Ror^c}ZdCp0B=$x|S za_SDtj~FlCv(-O-YjC_lugcK**!UE3|GeD zj=M%|E@4-^Y8&b7hphh^Gmc$o@(OlThb723H*|9uuzIrZ4t8?q*!{g5yZLj;AU;#p z&0T?K)atZ-Q*u653P`+>^Z+i?Q-PrGlrrq7}A7!M!jZZQc@rY#r!KX`~NR zEkDRLY6Ly7kJGS)?DLMo^gxSydkZ>vO2`L7HZ%vU zmtHIcSDq9B_qsxMLZyfmez`H)hOsePlSyJvG=aKNC{#*DLjx&ViPVa8dr{Qz*0qA( zue?rYk@LkqF3o6E7E>m55-`DbDF1(R8C>cp)YYrqzJ4|r7b6k0}?tK$nZRWC|GXsicel*g}yXdrUI(+yr* zCLOKsXuW%868FnbH<7P%N#nImH)HPLyHA(2@>Cs}-gF1c?k1dtB<%$Egr|B^c>Sa& zDUoChJ$1fz1Er2ApaG^4S;Qy(epzQ<)2mmrLRW@ynpx_8kj>_*?}`{02;>_{%6-j_8 z3)ImGAWP_cEoCc}nFVovCE3%SU57XQN;{0;R81hzC0}RdNgOgJ9vku{F!}7kN^|F3 zn5}kxb2CZPop)lfD#HiDTkvkKEw7`@I~^fgVGwyMK{rf;XlFHP^*TsZwX@{C6?>a9 z*%wOfEU(<~ekUYq8ubUQcW#SrB2(nm&Pu|?PND&V>}_=WnX@m_T~dh7JM|hm!44Ot+#6pO2IMGus{f!% zjYF22oIRo*puCTRhb zHC3v3A<*Pw49$;vEst0b=-k)H|G)4)=Q?67fDrVl-~}8o9e8oCvj^0Mwyh-Ub?7L` z&mGF4rrtz=y5KEuZe!cBOnNcLfsr6%J^DoP_C$Il1R0+v3|R@I0ef^hQOVJmRQ+lu zliocsEh9}ur<-nr?P}PDlxhC$Py)?~r^11NN06_c+E44ZlXY6SoKKE;08qk+P-(2n zh}npPX0Z&s(*|__VLhoUQDYSRy4O>{AEmqlz?ss^&!_~>=Frs(8k*0jMr4_&R2gRk zEd%%6t;MxK!69+o&Rl)g?UIzt!EvQW{tVEN6hqkpq|9_MxG;ZrVZO64&%OB@&H1;R z^IMJiwqM;!gS4km>4FR$)aLg*T5{K$e+#STyLFZo^V2)4X#+@|TcLAq+f|G-`cE?9 z>L~j7yK!hw?3zShHn6>3lE1)12U{-DNGq?+_;w3!VZ$h#&X*=addc+XNbP5fzh2JH zj}v2(Lw@BB%5631-|j_1(pOJrcl+reb7;vRiG^RFDJ3b6StU&J~NY*Jy#R<=wpea))cz>pZ;p&8b!g7e(GMsy2>dx}DF9g&E6c7st0%ZgoqVKCCu#W!mZwGKq5FJ~~HnNmm!l>8-igh<-h+}Rgt sg{4v{FVF;?`kHI5xK5Wz&Y+4q35Q3n69}jpItQN;`CVg<^}@^l1vR`i>;M1& literal 0 HcmV?d00001 diff --git a/config.json b/config.json new file mode 100644 index 0000000..362e974 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "template": "_TEMPLATE_ACHVerReport.xlsx", + "output_dir": "../", + "sql_query": "OnBaseSearchQuery.txt" +} \ No newline at end of file diff --git a/inputManager.py b/inputManager.py new file mode 100644 index 0000000..82af968 --- /dev/null +++ b/inputManager.py @@ -0,0 +1,29 @@ +from datetime import datetime as dt +import re + +def get_login() -> str: + """ + Logs get's login info + """ + print("NOTE: This program requires the user to have read access to: LPP-SQL01.Onbase") + un = input("What is your LEAF login name? ") + pw = input("What is your LEAF password? ") + loginStr = f"{un.lower()}:{pw}" + return loginStr + +def get_timeframe(startDate: str = "09/27/2022", endDate: str = dt.now().strftime("%m/%d/%y")): + print(f"""\nCurrent report timeframe: +Start Date: {startDate}\nEnd Date: {endDate}""") + edit = input("Would you like to edit this? (y/n): ").lower() + while (edit != 'y' and edit != 'n'): + print(edit) + edit = input("Please enter y or n: ") + if edit == 'y': + startDate = input("Start Date (mm/dd/yyyy): ") + while re.search("\d{2}/\d{2}/\d{4}", startDate) == None: + startDate = input("Please enter a start date with the following format: mm/dd/yyyy\n") + endDate = input("End Date (mm/dd/yyyy): ") + while re.search("\d{2}/\d{2}/\d{4}", endDate) == None: + endDate = input("Please enter a end date with the following format: mm/dd/yyyy\n") + get_timeframe(startDate, endDate) + return startDate, endDate \ No newline at end of file diff --git a/main.py b/main.py index 13a1f44..c975e55 100644 --- a/main.py +++ b/main.py @@ -1,252 +1,28 @@ +import openpyxl as pxl +from openpyxl import load_workbook import pandas as pd +from datetime import datetime as dt from pprint import pprint as prt -import sqlalchemy as sqa -from datetime import timedelta, time,datetime as dt -import numpy as np -import businesstimedelta -import pytz +import os +import json +# Custom modules +import onBaseData +import inputManager +with open('config.json') as json_file: + config = json.load(json_file) -workday = businesstimedelta.WorkDayRule( - start_time= time(7), - end_time= time(18), - working_days=[0, 1, 2, 3, 4], - tz=pytz.timezone("US/Eastern")) +with open(config["sql_query"]) as sqlQFile: + sqlQuery = sqlQFile.read() -businesshrs = businesstimedelta.Rules([workday]) +loginStr = inputManager.get_login() +startDate, endDate = inputManager.get_timeframe(startDate= "09/27/2022", endDate= dt.now().strftime("%m/%d/%y")) +rawData = onBaseData.get_data(login=loginStr, startDate=startDate, endDate=endDate, sqlQuery=sqlQuery) +fullData = onBaseData.inital_data_processing(raw_report= rawData) -def pfd(dataframe): - with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also - prt(dataframe) +newReport = f"ACHVerificationReport {dt.now().strftime('%m-%d-%y')}.xlsx" +os.system(f"cp {config['template']} {config['output_dir']}'{newReport}'") -def login() -> str: - """ - Logs get's login info - """ - un = input("What is your LEAF login name? ") - pw = input("What is your LEAF password? ") - login_str = f"{un.lower()}:{pw}" - return login_str - -def get_timeframe(): - start_date = "09/27/2022" - end_date = dt.now().strftime("%m/%d/%y") - return start_date, end_date - -def get_data() -> pd.DataFrame: - connection_str = f"mssql+pymssql://leafnow.com\{login()}@LPP-SQL01" - try: - print(f"Connectiong to SQL database...") - with sqa.create_engine(connection_str).connect() as con: - start_date, end_date = get_timeframe() - print("Pulling data...") - query = f""" - use Onbase -select ---id.itemnum, -RTRIM(ki105.keyvaluechar) as ApplicationNum -,RTRIM(ki103.keyvaluechar) as CustName -,RTRIM(ki136.keyvaluechar) as SubmittedBy -,Rtrim(ki354.keyvaluechar) as MarketingRepEmail -,RTRIM(ki1076.keyvaluechar) as Status -,RTRIM(ki1456.keyvaluechar) as APRep -,RTRIM(ki1457.keyvaluetod) as StatusDateTime -,RTRIM(kgd426.kg749) as vendornum -,RTRIM(kgd426.kg750) as vendorname -,RTRIM(kgd426.kg1388) as firstattempt -,kgd426.kg1454 as firstattemptdate -,RTRIM(kgd426.kg1452) as firstattemptcomments -,RTRIM(kgd426.kg1389) as secondattempt -,kgd426.kg1455 as secondattemptdate -,RTRIM(kgd426.kg1453) as secondattemptcomments -,RTRIM(kgd426.kg1075) as leafemployee -,RTRIM(kgd426.kg1074) as vendorverifiedwith -,min(wf101.entrytime) as QueueEntryTime -,max(wf101.exittime) as QueueExitTime - - -from hsi.itemdata id ---join hsi.doctype dt on dt.itemtypenum = id.itemtypenum -join hsi.keyrecorddata426 kgd426 on kgd426.itemnum = id.itemnum -left outer join hsi.keyitem105 ki105 on ki105.itemnum = id.itemnum -left outer join hsi.keyitem103 ki103 on ki103.itemnum = id.itemnum -left outer join hsi.keyitem136 ki136 on ki136.itemnum = id.itemnum -left outer join hsi.keyitem354 ki354 on ki354.itemnum = id.itemnum -left outer join hsi.keyitem1076 ki1076 on ki1076.itemnum = id.itemnum -left outer join hsi.keyitem1457 ki1457 on ki1457.itemnum = id.itemnum -left outer join hsi.keyitem1456 ki1456 on ki1456.itemnum = id.itemnum -left outer join hsi.keyitem749 ki749 on ki749.itemnum = id.itemnum -left outer join hsi.keyitem750 ki750 on ki750.itemnum = id.itemnum -left outer join hsi.keyitem1388 ki1388 on ki1388.itemnum = id.itemnum -left outer join hsi.keyitem1454 ki1454 on ki1454.itemnum = id.itemnum -left outer join hsi.keyitem1452 ki1452 on ki1452.itemnum = id.itemnum -left outer join hsi.keyitem1389 ki1389 on ki1389.itemnum = id.itemnum -left outer join hsi.keyitem1455 ki1455 on ki1455.itemnum = id.itemnum -left outer join hsi.keyitem1453 ki1453 on ki1453.itemnum = id.itemnum -left outer join hsi.keyitem1075 ki1075 on ki1075.itemnum = id.itemnum -left outer join hsi.keyitem1074 ki1074 on ki1074.itemnum = id.itemnum ---PHL LifeCycle = 101; ACH Queue = 405 -join hsi.wflog wf101 on wf101.itemnum = id.itemnum and wf101.lcnum = '101' and wf101.statenum = '405' - -where id.itemtypenum = 535 -and id.status = 0 -and CONVERT(DATE,ki1457.keyvaluetod) BETWEEN '{start_date}' and '{end_date}' - -group by -ki105.keyvaluechar -,ki103.keyvaluechar -,ki136.keyvaluechar -,ki354.keyvaluechar -,ki1076.keyvaluechar -,ki1456.keyvaluechar -,ki1457.keyvaluetod -,kgd426.kg749 -,kgd426.kg750 -,kgd426.kg1388 -,kgd426.kg1454 -,kgd426.kg1452 -,kgd426.kg1389 -,kgd426.kg1455 -,kgd426.kg1453 -,kgd426.kg1075 -,kgd426.kg1074 - -UNION - -select ---id.itemnum, -RTRIM(ki105.keyvaluechar) as ApplicationNum -,RTRIM(ki103.keyvaluechar) as CustName -,RTRIM(ki136.keyvaluechar) as SubmittedBy -,Rtrim(ki354.keyvaluechar) as MarketingRepEmail -,RTRIM(ki1076.keyvaluechar) as Status -,RTRIM(ki1456.keyvaluechar) as APRep -,RTRIM(ki1457.keyvaluetod) as StatusDateTime -,RTRIM(kgd426.kg749) as vendornum -,RTRIM(kgd426.kg750) as vendorname -,RTRIM(kgd426.kg1388) as firstattempt -,RTRIM(kgd426.kg1454) as firstattemptdate -,RTRIM(kgd426.kg1452) as firstattemptcomments -,RTRIM(kgd426.kg1389) as secondattempt -,RTRIM(kgd426.kg1455) as secondattemptdate -,RTRIM(kgd426.kg1453) as secondattemptcomments -,RTRIM(kgd426.kg1075) as leafemployee -,RTRIM(kgd426.kg1074) as vendorverifiedwith -,min(wf106.entrytime) as QueueEntryTime -,max(wf106.exittime) as QueueExitTime - - -from hsi.itemdata id ---join hsi.doctype dt on dt.itemtypenum = id.itemtypenum -join hsi.keyrecorddata426 kgd426 on kgd426.itemnum = id.itemnum -left outer join hsi.keyitem105 ki105 on ki105.itemnum = id.itemnum -left outer join hsi.keyitem103 ki103 on ki103.itemnum = id.itemnum -left outer join hsi.keyitem136 ki136 on ki136.itemnum = id.itemnum -left outer join hsi.keyitem354 ki354 on ki354.itemnum = id.itemnum -left outer join hsi.keyitem1076 ki1076 on ki1076.itemnum = id.itemnum -left outer join hsi.keyitem1457 ki1457 on ki1457.itemnum = id.itemnum -left outer join hsi.keyitem1456 ki1456 on ki1456.itemnum = id.itemnum -left outer join hsi.keyitem749 ki749 on ki749.itemnum = id.itemnum -left outer join hsi.keyitem750 ki750 on ki750.itemnum = id.itemnum -left outer join hsi.keyitem1388 ki1388 on ki1388.itemnum = id.itemnum -left outer join hsi.keyitem1454 ki1454 on ki1454.itemnum = id.itemnum -left outer join hsi.keyitem1452 ki1452 on ki1452.itemnum = id.itemnum -left outer join hsi.keyitem1389 ki1389 on ki1389.itemnum = id.itemnum -left outer join hsi.keyitem1455 ki1455 on ki1455.itemnum = id.itemnum -left outer join hsi.keyitem1453 ki1453 on ki1453.itemnum = id.itemnum -left outer join hsi.keyitem1075 ki1075 on ki1075.itemnum = id.itemnum -left outer join hsi.keyitem1074 ki1074 on ki1074.itemnum = id.itemnum ---MOB LifeCycle = 106; ACH Queue = 417 -join hsi.wflog wf106 on wf106.itemnum = id.itemnum and wf106.lcnum = '106' and wf106.statenum = '417' - -where id.itemtypenum = 535 -and id.status = 0 -and CONVERT(DATE,ki1457.keyvaluetod) BETWEEN '09/27/2022' and '12/06/2022' - -group by -ki105.keyvaluechar -,ki103.keyvaluechar -,ki136.keyvaluechar -,ki354.keyvaluechar -,ki1076.keyvaluechar -,ki1456.keyvaluechar -,ki1457.keyvaluetod -,kgd426.kg749 -,kgd426.kg750 -,kgd426.kg1388 -,kgd426.kg1454 -,kgd426.kg1452 -,kgd426.kg1389 -,kgd426.kg1455 -,kgd426.kg1453 -,kgd426.kg1075 -,kgd426.kg1074 -Order by 1 - """ - try: - result = con.execute(query).all() - try: - dataframe = pd.DataFrame(result) - return dataframe - except: - print(f"Failed to create a dataframe from SQL result:\n{result}") - except Exception as e: - print(f"Failed to pull data from SQL:\n{query}\n{e}") - except Exception as e: - print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!") - -def stats_for_col(columns: str, df: pd.DataFrame) -> pd.DataFrame: - return pd.DataFrame({ - "Data" : [col for col in columns], - "Mean": [df[col].mean() for col in columns], - "Median": [df[col].median() for col in columns], - "Max": [df[col].max() for col in columns], - "Std": [df[col].std() for col in columns] - }) - -def process_data(raw_report: pd.DataFrame) -> pd.DataFrame: - - # Convert columns to datetime] - date_time_format = "%Y-%m-%d %H:%M:%S.%f" - raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p") - raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format) - raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format) - raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format) - raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format) - raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2 - if row.QueueExitTime > dt(1965,1,1) - and row.QueueExitTime > row.QueueEntryTime - else None, axis = 1) - raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2 - if - (row.secondattempt != None and row.firstattempt != None) and - (row.secondattemptdate > row.firstattemptdate) - else None, axis = 1) - raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2 - if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1) - else None, axis = 1) - - unique_deals = len(raw_report["ApplicationNum"].unique()) - ver_on_first = len(raw_report.query("firstattempt == 'VERIFICATION COMPLETED'")) - verified = len(raw_report.query("Status == 'VERIFICATION COMPLETED'")) - - failed_on_first = len(raw_report.query("firstattempt != 'VERIFICATION COMPLETED' & firstattempt != 'VERIFICATION IN PROCESS'")) - - col_stats = stats_for_col(["APTurnAround", "AttemptTimeDif", "TimeToFirstAttempt"], raw_report) - prt(col_stats) - print(f"\n# of deals: {unique_deals} | # payments {len(raw_report)}") - print(f"Verified on first: {ver_on_first} ({round(ver_on_first/len(raw_report),4) *100}%) | Failed on first: {failed_on_first} ({round(failed_on_first/len(raw_report),4) *100}%)") - - pt_by_ap_rep = pd.pivot_table(raw_report, index="APRep", values= ["APTurnAround","AttemptTimeDif","TimeToFirstAttempt"],aggfunc = [np.mean, np.median, np.max]) - prt(pt_by_ap_rep) - - return raw_report - - -ach_raw_report = get_data() -prt(ach_raw_report) -#pfd(ach_raw_report) -report_plus = process_data(ach_raw_report) -prt(report_plus) -report_plus.to_excel("test3.xlsx",index=False) +with pd.ExcelWriter(config['output_dir']+newReport, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: + fullData.to_excel(writer, sheet_name="raw_data") \ No newline at end of file diff --git a/onBaseData.py b/onBaseData.py new file mode 100644 index 0000000..e28d988 --- /dev/null +++ b/onBaseData.py @@ -0,0 +1,84 @@ +import pandas as pd +import sqlalchemy as sqa +import re +import businesstimedelta +import pytz +from datetime import time, datetime as dt +import sys + + +def get_data(login: str, startDate: str, endDate: str, sqlQuery: str) -> pd.DataFrame: + """ + Connects to the OnBase database on LPP-SQL01 and runs the query specified in OnBaseSearchQuery.txt + login : The login information used to authenticate with the MSSQL server. The user needs read permission on the Onbase database + startDate : The earliest dated record to pull. Format = mm/dd/yyyy + endDate : The most recent record to pull. Format = mm/dd/yyyy + """ + + connStr = f"mssql+pymssql://leafnow.com\{login}@LPP-SQL01" + try: + print(f"Connectiong to SQL database...") + with sqa.create_engine(connStr).connect() as con: + print("Pulling data...") + with open('OnBaseSearchQuery.txt') as obQueryFile: + filledQuery = sqlQuery.replace("REPLACE_START_DATE", startDate).replace("REPLACE_END_DATE", endDate) + try: + result = con.execute(filledQuery).all() + try: + dataframe = pd.DataFrame(result) + assert len(dataframe) > 0, f"No data in dataframe: {dataframe}\nQuery result: {result}" + return dataframe + except: + print(f"Failed to create a dataframe from SQL result:\n{result}") + sys.exit(2) + except Exception as e: + print(f"Failed to pull data from SQL:\n{filledQuery}\n{e}") + sys.exit(2) + except Exception as e: + print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!\tlogin: {login}") + sys.exit(2) + +def inital_data_processing(raw_report: pd.DataFrame) -> pd.DataFrame: + """ + Takes in a dataframe of ACH verification entries from the Onbase database. + This dataframe is based on the returns in the SQL query in OnBaseSearchQuery.txt. + + The return adds a number of columns to the data: + - APTurnAround: Total time between a report entering and exiting the ACH Verification queue + - AttemptTimeDif: Time between first and second attempt + - TimeToFirstAttempt: Time intil making the first attempt + + All time measurments are in business hours + """ + # Define business hours + # Currently 7am to 6pm eastern time + workday = businesstimedelta.WorkDayRule( + start_time= time(7), + end_time= time(18), + working_days=[0, 1, 2, 3, 4], + tz=pytz.timezone("US/Eastern")) + businesshrs = businesstimedelta.Rules([workday]) + + # Convert columns to datetime] + date_time_format = "%Y-%m-%d %H:%M:%S.%f" + raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p") + raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format) + raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format) + raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format) + raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format) + + # Add calculated time columns + # Check to make sure the columns being used are valid otherwise fill with None + raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2 + if row.QueueExitTime > dt(1965,1,1) + and row.QueueExitTime > row.QueueEntryTime + else None, axis = 1) + raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2 + if + (row.secondattempt != None and row.firstattempt != None) and + (row.secondattemptdate > row.firstattemptdate) + else None, axis = 1) + raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2 + if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1) + else None, axis = 1) + return raw_report \ No newline at end of file