A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/ach_fix.py

110 lines
5.3 KiB

import os
import pandas as pd
from datetime import datetime as dt, timedelta
import sys, getopt
import re
from pathlib import Path
import time
import numpy as np
contract_number_regex = "\d{3}-\d{7}-\d{3}"
def create_line_divider(breakage_list: list):
"""
This allows for the creation of a custom data extractor
Breakage list defines the split points that will be used for the line
Example
Given breakage_list [10, 20, 30]
using slot_num 0 in the resulting extract_line_slot will yield
characters 0 - 10 from the string.
Slot 1 would give characters 10 - 20
"""
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
"""
Pulls data from a line/string using break points defined by the
parent function.
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
Will automatically convert numbers to floats
"""
# We can't have a slot number higher than the number of slots
assert(slot_num < len(breakage_list)+1)
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
# In order to create a float we need to remove the , from the string
data = line_string[low_range:high_range].strip().replace(",", "")
try: data = float(data)
except: pass
if debug:
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
return data
return extract_line_slot
def ach(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"ContractNumber" : [],
"CustomerName" : [],
"BankCode" : [],
"BankNumber": [],
"AccountNumber" : [],
"Payment" : [],
"Batch": [],
"Lessor": [],
"PaymentDate": [],
}
columns = list(extracted_data_dict.keys())
batches = {
"batch_num": [],
"payment_date": [],
"lessor": [],
#"count": [],
"total": []
}
data_extractor = create_line_divider([19,57,67,82,104])
bank_number_regex = "\d{9}"
batch_num_regex = "BATCH \d{4} TOTAL"
for line in enumerate(lines):
# Check for a contract number and a bank number in the line
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
# Iterates through the columns list and adds the corresponding slot number to the dictonary for the column
# Here the order of the columns (keys in dictonary) matter since they need to be in the same order as
# the slot numbers
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)]
# This searches for a statement that looks like a batch number
# This sums the contracts by thier lessor code. A feature requested by cash apps
if re.search(batch_num_regex, line[1]) != None:
# Batch number is always in characters 96 to 101
batches["batch_num"].append(line[1][96:101])
# Payment date will be 2 lines below that between charactes 114 and 125
batches["payment_date"].append(lines[line[0]+2][114:125])
# Lessor is just the first three number sof the contract number
batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
# Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
#print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
#batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
# Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
# So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
[extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
[extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))]
[extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))]
# Now the dictioanry lists should all be equal lengths and we can create a dataframe
dataframe = pd.DataFrame(extracted_data_dict)
# We're creating two sheets: data & summary so we need to open and excel writer
# This also helps with a bug caused by larger dataframes
with pd.ExcelWriter(save_name) as writer:
dataframe.to_excel(writer, index=False, sheet_name="data")
# The batches dictioanry is converted to a dataframe and added as it's own sheet
pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary")
return dataframe
r1 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.05.27_ACH_C"
r2 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.06.03_ACH_C"
with open(r2, errors="replace") as ifile:
report = ifile.read()
ach(report, "test_ach_0613.xlsx")