Finished v1 | seperated core functions, extracted query, added config, using template

master
Griffiths Lott 3 years ago
parent 6c94e493dc
commit 6fef1ebbfb
  1. BIN
      __pycache__/inputManager.cpython-39.pyc
  2. BIN
      __pycache__/inputPutManager.cpython-39.pyc
  3. BIN
      __pycache__/onBaseData.cpython-39.pyc
  4. 5
      config.json
  5. 29
      inputManager.py
  6. 264
      main.py
  7. 84
      onBaseData.py

@ -0,0 +1,5 @@
{
"template": "_TEMPLATE_ACHVerReport.xlsx",
"output_dir": "../",
"sql_query": "OnBaseSearchQuery.txt"
}

@ -0,0 +1,29 @@
from datetime import datetime as dt
import re
def get_login() -> str:
"""
Logs get's login info
"""
print("NOTE: This program requires the user to have read access to: LPP-SQL01.Onbase")
un = input("What is your LEAF login name? ")
pw = input("What is your LEAF password? ")
loginStr = f"{un.lower()}:{pw}"
return loginStr
def get_timeframe(startDate: str = "09/27/2022", endDate: str = dt.now().strftime("%m/%d/%y")):
print(f"""\nCurrent report timeframe:
Start Date: {startDate}\nEnd Date: {endDate}""")
edit = input("Would you like to edit this? (y/n): ").lower()
while (edit != 'y' and edit != 'n'):
print(edit)
edit = input("Please enter y or n: ")
if edit == 'y':
startDate = input("Start Date (mm/dd/yyyy): ")
while re.search("\d{2}/\d{2}/\d{4}", startDate) == None:
startDate = input("Please enter a start date with the following format: mm/dd/yyyy\n")
endDate = input("End Date (mm/dd/yyyy): ")
while re.search("\d{2}/\d{2}/\d{4}", endDate) == None:
endDate = input("Please enter a end date with the following format: mm/dd/yyyy\n")
get_timeframe(startDate, endDate)
return startDate, endDate

@ -1,252 +1,28 @@
import openpyxl as pxl
from openpyxl import load_workbook
import pandas as pd
from datetime import datetime as dt
from pprint import pprint as prt
import sqlalchemy as sqa
from datetime import timedelta, time,datetime as dt
import numpy as np
import businesstimedelta
import pytz
import os
import json
# Custom modules
import onBaseData
import inputManager
with open('config.json') as json_file:
config = json.load(json_file)
workday = businesstimedelta.WorkDayRule(
start_time= time(7),
end_time= time(18),
working_days=[0, 1, 2, 3, 4],
tz=pytz.timezone("US/Eastern"))
with open(config["sql_query"]) as sqlQFile:
sqlQuery = sqlQFile.read()
businesshrs = businesstimedelta.Rules([workday])
loginStr = inputManager.get_login()
startDate, endDate = inputManager.get_timeframe(startDate= "09/27/2022", endDate= dt.now().strftime("%m/%d/%y"))
rawData = onBaseData.get_data(login=loginStr, startDate=startDate, endDate=endDate, sqlQuery=sqlQuery)
fullData = onBaseData.inital_data_processing(raw_report= rawData)
def pfd(dataframe):
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
prt(dataframe)
newReport = f"ACHVerificationReport {dt.now().strftime('%m-%d-%y')}.xlsx"
os.system(f"cp {config['template']} {config['output_dir']}'{newReport}'")
def login() -> str:
"""
Logs get's login info
"""
un = input("What is your LEAF login name? ")
pw = input("What is your LEAF password? ")
login_str = f"{un.lower()}:{pw}"
return login_str
def get_timeframe():
start_date = "09/27/2022"
end_date = dt.now().strftime("%m/%d/%y")
return start_date, end_date
def get_data() -> pd.DataFrame:
connection_str = f"mssql+pymssql://leafnow.com\{login()}@LPP-SQL01"
try:
print(f"Connectiong to SQL database...")
with sqa.create_engine(connection_str).connect() as con:
start_date, end_date = get_timeframe()
print("Pulling data...")
query = f"""
use Onbase
select
--id.itemnum,
RTRIM(ki105.keyvaluechar) as ApplicationNum
,RTRIM(ki103.keyvaluechar) as CustName
,RTRIM(ki136.keyvaluechar) as SubmittedBy
,Rtrim(ki354.keyvaluechar) as MarketingRepEmail
,RTRIM(ki1076.keyvaluechar) as Status
,RTRIM(ki1456.keyvaluechar) as APRep
,RTRIM(ki1457.keyvaluetod) as StatusDateTime
,RTRIM(kgd426.kg749) as vendornum
,RTRIM(kgd426.kg750) as vendorname
,RTRIM(kgd426.kg1388) as firstattempt
,kgd426.kg1454 as firstattemptdate
,RTRIM(kgd426.kg1452) as firstattemptcomments
,RTRIM(kgd426.kg1389) as secondattempt
,kgd426.kg1455 as secondattemptdate
,RTRIM(kgd426.kg1453) as secondattemptcomments
,RTRIM(kgd426.kg1075) as leafemployee
,RTRIM(kgd426.kg1074) as vendorverifiedwith
,min(wf101.entrytime) as QueueEntryTime
,max(wf101.exittime) as QueueExitTime
from hsi.itemdata id
--join hsi.doctype dt on dt.itemtypenum = id.itemtypenum
join hsi.keyrecorddata426 kgd426 on kgd426.itemnum = id.itemnum
left outer join hsi.keyitem105 ki105 on ki105.itemnum = id.itemnum
left outer join hsi.keyitem103 ki103 on ki103.itemnum = id.itemnum
left outer join hsi.keyitem136 ki136 on ki136.itemnum = id.itemnum
left outer join hsi.keyitem354 ki354 on ki354.itemnum = id.itemnum
left outer join hsi.keyitem1076 ki1076 on ki1076.itemnum = id.itemnum
left outer join hsi.keyitem1457 ki1457 on ki1457.itemnum = id.itemnum
left outer join hsi.keyitem1456 ki1456 on ki1456.itemnum = id.itemnum
left outer join hsi.keyitem749 ki749 on ki749.itemnum = id.itemnum
left outer join hsi.keyitem750 ki750 on ki750.itemnum = id.itemnum
left outer join hsi.keyitem1388 ki1388 on ki1388.itemnum = id.itemnum
left outer join hsi.keyitem1454 ki1454 on ki1454.itemnum = id.itemnum
left outer join hsi.keyitem1452 ki1452 on ki1452.itemnum = id.itemnum
left outer join hsi.keyitem1389 ki1389 on ki1389.itemnum = id.itemnum
left outer join hsi.keyitem1455 ki1455 on ki1455.itemnum = id.itemnum
left outer join hsi.keyitem1453 ki1453 on ki1453.itemnum = id.itemnum
left outer join hsi.keyitem1075 ki1075 on ki1075.itemnum = id.itemnum
left outer join hsi.keyitem1074 ki1074 on ki1074.itemnum = id.itemnum
--PHL LifeCycle = 101; ACH Queue = 405
join hsi.wflog wf101 on wf101.itemnum = id.itemnum and wf101.lcnum = '101' and wf101.statenum = '405'
where id.itemtypenum = 535
and id.status = 0
and CONVERT(DATE,ki1457.keyvaluetod) BETWEEN '{start_date}' and '{end_date}'
group by
ki105.keyvaluechar
,ki103.keyvaluechar
,ki136.keyvaluechar
,ki354.keyvaluechar
,ki1076.keyvaluechar
,ki1456.keyvaluechar
,ki1457.keyvaluetod
,kgd426.kg749
,kgd426.kg750
,kgd426.kg1388
,kgd426.kg1454
,kgd426.kg1452
,kgd426.kg1389
,kgd426.kg1455
,kgd426.kg1453
,kgd426.kg1075
,kgd426.kg1074
UNION
select
--id.itemnum,
RTRIM(ki105.keyvaluechar) as ApplicationNum
,RTRIM(ki103.keyvaluechar) as CustName
,RTRIM(ki136.keyvaluechar) as SubmittedBy
,Rtrim(ki354.keyvaluechar) as MarketingRepEmail
,RTRIM(ki1076.keyvaluechar) as Status
,RTRIM(ki1456.keyvaluechar) as APRep
,RTRIM(ki1457.keyvaluetod) as StatusDateTime
,RTRIM(kgd426.kg749) as vendornum
,RTRIM(kgd426.kg750) as vendorname
,RTRIM(kgd426.kg1388) as firstattempt
,RTRIM(kgd426.kg1454) as firstattemptdate
,RTRIM(kgd426.kg1452) as firstattemptcomments
,RTRIM(kgd426.kg1389) as secondattempt
,RTRIM(kgd426.kg1455) as secondattemptdate
,RTRIM(kgd426.kg1453) as secondattemptcomments
,RTRIM(kgd426.kg1075) as leafemployee
,RTRIM(kgd426.kg1074) as vendorverifiedwith
,min(wf106.entrytime) as QueueEntryTime
,max(wf106.exittime) as QueueExitTime
from hsi.itemdata id
--join hsi.doctype dt on dt.itemtypenum = id.itemtypenum
join hsi.keyrecorddata426 kgd426 on kgd426.itemnum = id.itemnum
left outer join hsi.keyitem105 ki105 on ki105.itemnum = id.itemnum
left outer join hsi.keyitem103 ki103 on ki103.itemnum = id.itemnum
left outer join hsi.keyitem136 ki136 on ki136.itemnum = id.itemnum
left outer join hsi.keyitem354 ki354 on ki354.itemnum = id.itemnum
left outer join hsi.keyitem1076 ki1076 on ki1076.itemnum = id.itemnum
left outer join hsi.keyitem1457 ki1457 on ki1457.itemnum = id.itemnum
left outer join hsi.keyitem1456 ki1456 on ki1456.itemnum = id.itemnum
left outer join hsi.keyitem749 ki749 on ki749.itemnum = id.itemnum
left outer join hsi.keyitem750 ki750 on ki750.itemnum = id.itemnum
left outer join hsi.keyitem1388 ki1388 on ki1388.itemnum = id.itemnum
left outer join hsi.keyitem1454 ki1454 on ki1454.itemnum = id.itemnum
left outer join hsi.keyitem1452 ki1452 on ki1452.itemnum = id.itemnum
left outer join hsi.keyitem1389 ki1389 on ki1389.itemnum = id.itemnum
left outer join hsi.keyitem1455 ki1455 on ki1455.itemnum = id.itemnum
left outer join hsi.keyitem1453 ki1453 on ki1453.itemnum = id.itemnum
left outer join hsi.keyitem1075 ki1075 on ki1075.itemnum = id.itemnum
left outer join hsi.keyitem1074 ki1074 on ki1074.itemnum = id.itemnum
--MOB LifeCycle = 106; ACH Queue = 417
join hsi.wflog wf106 on wf106.itemnum = id.itemnum and wf106.lcnum = '106' and wf106.statenum = '417'
where id.itemtypenum = 535
and id.status = 0
and CONVERT(DATE,ki1457.keyvaluetod) BETWEEN '09/27/2022' and '12/06/2022'
group by
ki105.keyvaluechar
,ki103.keyvaluechar
,ki136.keyvaluechar
,ki354.keyvaluechar
,ki1076.keyvaluechar
,ki1456.keyvaluechar
,ki1457.keyvaluetod
,kgd426.kg749
,kgd426.kg750
,kgd426.kg1388
,kgd426.kg1454
,kgd426.kg1452
,kgd426.kg1389
,kgd426.kg1455
,kgd426.kg1453
,kgd426.kg1075
,kgd426.kg1074
Order by 1
"""
try:
result = con.execute(query).all()
try:
dataframe = pd.DataFrame(result)
return dataframe
except:
print(f"Failed to create a dataframe from SQL result:\n{result}")
except Exception as e:
print(f"Failed to pull data from SQL:\n{query}\n{e}")
except Exception as e:
print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!")
def stats_for_col(columns: str, df: pd.DataFrame) -> pd.DataFrame:
return pd.DataFrame({
"Data" : [col for col in columns],
"Mean": [df[col].mean() for col in columns],
"Median": [df[col].median() for col in columns],
"Max": [df[col].max() for col in columns],
"Std": [df[col].std() for col in columns]
})
def process_data(raw_report: pd.DataFrame) -> pd.DataFrame:
# Convert columns to datetime]
date_time_format = "%Y-%m-%d %H:%M:%S.%f"
raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p")
raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format)
raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format)
raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format)
raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format)
raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2
if row.QueueExitTime > dt(1965,1,1)
and row.QueueExitTime > row.QueueEntryTime
else None, axis = 1)
raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2
if
(row.secondattempt != None and row.firstattempt != None) and
(row.secondattemptdate > row.firstattemptdate)
else None, axis = 1)
raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2
if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1)
else None, axis = 1)
unique_deals = len(raw_report["ApplicationNum"].unique())
ver_on_first = len(raw_report.query("firstattempt == 'VERIFICATION COMPLETED'"))
verified = len(raw_report.query("Status == 'VERIFICATION COMPLETED'"))
failed_on_first = len(raw_report.query("firstattempt != 'VERIFICATION COMPLETED' & firstattempt != 'VERIFICATION IN PROCESS'"))
col_stats = stats_for_col(["APTurnAround", "AttemptTimeDif", "TimeToFirstAttempt"], raw_report)
prt(col_stats)
print(f"\n# of deals: {unique_deals} | # payments {len(raw_report)}")
print(f"Verified on first: {ver_on_first} ({round(ver_on_first/len(raw_report),4) *100}%) | Failed on first: {failed_on_first} ({round(failed_on_first/len(raw_report),4) *100}%)")
pt_by_ap_rep = pd.pivot_table(raw_report, index="APRep", values= ["APTurnAround","AttemptTimeDif","TimeToFirstAttempt"],aggfunc = [np.mean, np.median, np.max])
prt(pt_by_ap_rep)
return raw_report
ach_raw_report = get_data()
prt(ach_raw_report)
#pfd(ach_raw_report)
report_plus = process_data(ach_raw_report)
prt(report_plus)
report_plus.to_excel("test3.xlsx",index=False)
with pd.ExcelWriter(config['output_dir']+newReport, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
fullData.to_excel(writer, sheet_name="raw_data")

@ -0,0 +1,84 @@
import pandas as pd
import sqlalchemy as sqa
import re
import businesstimedelta
import pytz
from datetime import time, datetime as dt
import sys
def get_data(login: str, startDate: str, endDate: str, sqlQuery: str) -> pd.DataFrame:
"""
Connects to the OnBase database on LPP-SQL01 and runs the query specified in OnBaseSearchQuery.txt
login : The login information used to authenticate with the MSSQL server. The user needs read permission on the Onbase database
startDate : The earliest dated record to pull. Format = mm/dd/yyyy
endDate : The most recent record to pull. Format = mm/dd/yyyy
"""
connStr = f"mssql+pymssql://leafnow.com\{login}@LPP-SQL01"
try:
print(f"Connectiong to SQL database...")
with sqa.create_engine(connStr).connect() as con:
print("Pulling data...")
with open('OnBaseSearchQuery.txt') as obQueryFile:
filledQuery = sqlQuery.replace("REPLACE_START_DATE", startDate).replace("REPLACE_END_DATE", endDate)
try:
result = con.execute(filledQuery).all()
try:
dataframe = pd.DataFrame(result)
assert len(dataframe) > 0, f"No data in dataframe: {dataframe}\nQuery result: {result}"
return dataframe
except:
print(f"Failed to create a dataframe from SQL result:\n{result}")
sys.exit(2)
except Exception as e:
print(f"Failed to pull data from SQL:\n{filledQuery}\n{e}")
sys.exit(2)
except Exception as e:
print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!\tlogin: {login}")
sys.exit(2)
def inital_data_processing(raw_report: pd.DataFrame) -> pd.DataFrame:
"""
Takes in a dataframe of ACH verification entries from the Onbase database.
This dataframe is based on the returns in the SQL query in OnBaseSearchQuery.txt.
The return adds a number of columns to the data:
- APTurnAround: Total time between a report entering and exiting the ACH Verification queue
- AttemptTimeDif: Time between first and second attempt
- TimeToFirstAttempt: Time intil making the first attempt
All time measurments are in business hours
"""
# Define business hours
# Currently 7am to 6pm eastern time
workday = businesstimedelta.WorkDayRule(
start_time= time(7),
end_time= time(18),
working_days=[0, 1, 2, 3, 4],
tz=pytz.timezone("US/Eastern"))
businesshrs = businesstimedelta.Rules([workday])
# Convert columns to datetime]
date_time_format = "%Y-%m-%d %H:%M:%S.%f"
raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p")
raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format)
raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format)
raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format)
raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format)
# Add calculated time columns
# Check to make sure the columns being used are valid otherwise fill with None
raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2
if row.QueueExitTime > dt(1965,1,1)
and row.QueueExitTime > row.QueueEntryTime
else None, axis = 1)
raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2
if
(row.secondattempt != None and row.firstattempt != None) and
(row.secondattemptdate > row.firstattemptdate)
else None, axis = 1)
raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2
if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1)
else None, axis = 1)
return raw_report
Loading…
Cancel
Save