You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
ACHVerification/onBaseData.py

84 lines
4.2 KiB

import pandas as pd
import sqlalchemy as sqa
import re
import businesstimedelta
import pytz
from datetime import time, datetime as dt
import sys
def get_data(login: str, startDate: str, endDate: str, sqlQuery: str) -> pd.DataFrame:
"""
Connects to the OnBase database on LPP-SQL01 and runs the query specified in OnBaseSearchQuery.txt
login : The login information used to authenticate with the MSSQL server. The user needs read permission on the Onbase database
startDate : The earliest dated record to pull. Format = mm/dd/yyyy
endDate : The most recent record to pull. Format = mm/dd/yyyy
"""
connStr = f"mssql+pymssql://leafnow.com\{login}@LPP-SQL01"
try:
print(f"Connectiong to SQL database...")
with sqa.create_engine(connStr).connect() as con:
print("Pulling data...")
with open('OnBaseSearchQuery.txt') as obQueryFile:
filledQuery = sqlQuery.replace("REPLACE_START_DATE", startDate).replace("REPLACE_END_DATE", endDate)
try:
result = con.execute(filledQuery).all()
try:
dataframe = pd.DataFrame(result)
assert len(dataframe) > 0, f"No data in dataframe: {dataframe}\nQuery result: {result}"
return dataframe
except:
print(f"Failed to create a dataframe from SQL result:\n{result}")
sys.exit(2)
except Exception as e:
print(f"Failed to pull data from SQL:\n{filledQuery}\n{e}")
sys.exit(2)
except Exception as e:
print(f"Failed to connect to SQL:\n{e}\nPlease make sure your username and password are correct!\tlogin: {login}")
sys.exit(2)
def inital_data_processing(raw_report: pd.DataFrame) -> pd.DataFrame:
"""
Takes in a dataframe of ACH verification entries from the Onbase database.
This dataframe is based on the returns in the SQL query in OnBaseSearchQuery.txt.
The return adds a number of columns to the data:
- APTurnAround: Total time between a report entering and exiting the ACH Verification queue
- AttemptTimeDif: Time between first and second attempt
- TimeToFirstAttempt: Time intil making the first attempt
All time measurments are in business hours
"""
# Define business hours
# Currently 7am to 6pm eastern time
workday = businesstimedelta.WorkDayRule(
start_time= time(7),
end_time= time(18),
working_days=[0, 1, 2, 3, 4],
tz=pytz.timezone("US/Eastern"))
businesshrs = businesstimedelta.Rules([workday])
# Convert columns to datetime]
date_time_format = "%Y-%m-%d %H:%M:%S.%f"
raw_report["StatusDateTime"] = pd.to_datetime(raw_report["StatusDateTime"], format="%b %d %Y %I:%M%p")
raw_report["firstattemptdate"] = pd.to_datetime(raw_report["firstattemptdate"], format=date_time_format)
raw_report["secondattemptdate"] = pd.to_datetime(raw_report["secondattemptdate"], format=date_time_format)
raw_report["QueueEntryTime"] = pd.to_datetime(raw_report["QueueEntryTime"], format=date_time_format)
raw_report["QueueExitTime"] = pd.to_datetime(raw_report["QueueExitTime"], format=date_time_format)
# Add calculated time columns
# Check to make sure the columns being used are valid otherwise fill with None
raw_report["APTurnAround"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.QueueExitTime).timedelta.total_seconds() / 60**2
if row.QueueExitTime > dt(1965,1,1)
and row.QueueExitTime > row.QueueEntryTime
else None, axis = 1)
raw_report["AttemptTimeDif"] = raw_report.apply(lambda row: businesshrs.difference(row.firstattemptdate, row.secondattemptdate).timedelta.total_seconds() / 60**2
if
(row.secondattempt != None and row.firstattempt != None) and
(row.secondattemptdate > row.firstattemptdate)
else None, axis = 1)
raw_report["TimeToFirstAttempt"] = raw_report.apply(lambda row: businesshrs.difference(row.QueueEntryTime, row.firstattemptdate).timedelta.total_seconds() / 60**2
if row.firstattempt != None and row.QueueEntryTime > dt(1965,1,1)
else None, axis = 1)
return raw_report