You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
4.2 KiB
84 lines
4.2 KiB
import pandas as pd
|
|
import sqlalchemy as sqa
|
|
import re
|
|
import businesstimedelta
|
|
import pytz
|
|
from datetime import time, datetime as dt
|
|
import sys
|
|
|
|
|
|
def get_data(login: str, startDate: str, endDate: str, sqlQuery: str) -> pd.DataFrame:
|
|
"""
|
|
Connects to the OnBase database on LPP-SQL01 and runs the query specified in OnBaseSearchQuery.txt
|
|
login : The login information used to authenticate with the MSSQL server. The user needs read permission on the Onbase database
|
|
startDate : The earliest dated record to pull. Format = mm/dd/yyyy
|
|
endDate : The most recent record to pull. Format = mm/dd/yyyy
|
|
"""
|
|
|
|
connStr = f"mssql+pymssql://leafnow.com\{login}@LPP-SQL01"
|
|
try:
|
|
print(f"Connectiong to SQL database...")
|
|
with sqa.create_engine(connStr).connect() as con:
|
|
print("Pulling data...")
|
|
with open('OnBaseSearchQuery.txt') as obQueryFile:
|
|
filledQuery = sqlQuery.replace("REPLACE_START_DATE", startDate).replace("REPLACE_END_DATE", endDate)
|
|
try:
|
|
result = con.execute(filledQuery).all()
|
|
try:
|
|
dataframe = pd.DataFrame(result)
|
|
assert len(dataframe) > 0, f"No data in dataframe: {dataframe}\nQuery result: {result}"
|
|
return dataframe
|
|
except:
|
|
print(f"Failed to create a dataframe from SQL result:\n{result}")
|
|
sys.exit(2)
|
|
except Exception as e:
|
|
print(f"Failed to pull data from SQL:\n{filledQuery}\n{e}")
|
|
sys.exit(2)
|
|
except Exception as e:
|
|
print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!\tlogin: {login}")
|
|
sys.exit(2)
|
|
|
|
def inital_data_processing(raw_report: pd.DataFrame) -> pd.DataFrame:
|
|
"""
|
|
Takes in a dataframe of ACH verification entries from the Onbase database.
|
|
This dataframe is based on the returns in the SQL query in OnBaseSearchQuery.txt.
|
|
|
|
The return adds a number of columns to the data:
|
|
- APTurnAround: Total time between a report entering and exiting the ACH Verification queue
|
|
- AttemptTimeDif: Time between first and second attempt
|
|
- TimeToFirstAttempt: Time intil making the first attempt
|
|
|
|
All time measurments are in business hours
|
|
"""
|
|
# Define business hours
|
|
# Currently 7am to 6pm eastern time
|
|
workday = businesstimedelta.WorkDayRule(
|
|
start_time= time(7),
|
|
end_time= time(18),
|
|
working_days=[0, 1, 2, 3, 4],
|
|
tz=pytz.timezone("US/Eastern"))
|
|
businesshrs = businesstimedelta.Rules([workday])
|
|
|
|
# Convert columns to datetime]
|
|
date_time_format = "%Y-%m-%d %H:%M:%S.%f"
|
|
raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p")
|
|
raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format)
|
|
raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format)
|
|
raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format)
|
|
raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format)
|
|
|
|
# Add calculated time columns
|
|
# Check to make sure the columns being used are valid otherwise fill with None
|
|
raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2
|
|
if row.QueueExitTime > dt(1965,1,1)
|
|
and row.QueueExitTime > row.QueueEntryTime
|
|
else None, axis = 1)
|
|
raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2
|
|
if
|
|
(row.secondattempt != None and row.firstattempt != None) and
|
|
(row.secondattemptdate > row.firstattemptdate)
|
|
else None, axis = 1)
|
|
raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2
|
|
if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1)
|
|
else None, axis = 1)
|
|
return raw_report |