Fixed issue with ACH (batch count error), added pbp invoice # regex, rewrote lockbox

v3.1
Griffiths Lott 4 years ago
parent df96574a98
commit d01fdde480
  1. 112
      2022.05.04_MINV_C
  2. 92
      ILExtract.py
  3. 142
      ManualInvoice.py
  4. 110
      ach_fix.py
  5. 168
      lbf.py
  6. 1
      main.py
  7. 2
      mainWindow_new.py
  8. BIN
      man_inv_test.xlsx
  9. BIN
      test_ach_0613.xlsx
  10. BIN
      test_lb_0510.xlsx
  11. BIN
      test_lb_0511.xlsx
  12. BIN
      test_lb_0613.xlsx

@ -0,0 +1,112 @@
DAILY.MANUAL.INVOICE
CONTRACTS THAT WERE NOT INVOICED
PAGE 04-26-22 1
CHRG BUSINESS
CONTRACT.NO........ UATB.OIC.DUE RENTAL DUE......... UATB.IDS.OIC.PAYME TYPEM..... OUTSTANDING BALANCE.... SEGMENT. BOOKING.DATE BRANCH
100-2453558-003 05/09/2022 183.71 0.00 MISC 201.16 001.000 03/09/2018 9
100-2453558-003 04/09/2022 183.71 180.67 MISC 20.49 001.000 03/09/2018 9
100-2453558-003 03/09/2022 183.71 183.71 MISC 17.45 001.000 03/09/2018 9
100-1665517-003 05/15/2022 412.97 0.00 MISC 438.78 001.000 10/31/2014 9
100-4850431-001 05/10/2022 411.80 0.00 MISC 441.21 001.000 12/10/2018 3
100-4462739-001 04/18/2022 157.08 0.00 RENT 157.08 001.000 06/18/2018 9
100-4850431-001 04/10/2022 411.80 0.00 MISC 441.21 001.000 12/10/2018 3
100-3500858-001 05/12/2022 262.37 0.00 MISC 279.42 001.000 04/13/2016 9
100-3725849-003 05/13/2022 559.32 0.00 MISC 612.45 001.000 10/19/2017 9
100-3500858-001 04/12/2022 262.37 0.00 MISC 279.42 001.000 04/13/2016 9
047-2580598-001 04/05/2022 0.00 0.00 MISC 72.53 001.000 03/06/2014 9
100-3725849-003 03/13/2022 559.32 559.32 MISC 53.13 001.000 10/19/2017 9
100-4566489-001 04/14/2022 0.00 354.04 MISC 2.25 001.000 06/14/2018 9
100-4566489-001 05/14/2022 0.00 328.00 MISC 28.29 001.000 06/14/2018 9
100-5382471-001 04/15/2022 1,128.00 0.00 MISC 1,228.11 001.000 10/09/2019 9
100-5382471-001 05/15/2022 1,128.00 0.00 MISC 1,228.11 001.000 10/09/2019 9
100-9723689-001 04/20/2022 0.00 0.00 RENT 571.58 001.000 04/20/2022 10
100-9723689-001 04/20/2022 0.00 0.00 RENT 571.58 001.000 04/20/2022 10
100-7219911-001 04/20/2022 0.00 0.00 RENT 813.08 001.000 04/20/2022 9
100-1354567-002 05/25/2022 170.00 0.00 RENT 170.00 001.000 05/31/2016 9
100-6651721-001 07/14/2021 0.00 761.00 MISC 53.27 001.000 07/14/2021 9
100-2081987-008 05/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9
100-2139037-002 05/20/2022 105.00 0.00 MISC 111.67 001.000 03/20/2017 9
100-3725849-003 04/13/2022 559.32 440.68 MISC 171.77 001.000 10/19/2017 9
100-3344078-002 05/01/2022 -28.53 18.47 RENT 63.00 001.000 10/01/2020 9
100-2081987-008 03/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9
100-1354567-002 04/25/2022 170.00 0.00 RENT 170.00 001.000 05/31/2016 9
100-2081987-008 04/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9
100-2081987-008 02/25/2022 407.15 407.15 MISC 32.58 001.000 10/02/2017 9
100-3876959-007 04/21/2022 61.07 0.00 RENT 61.07 001.000 06/21/2018 9
100-1637209-005 05/20/2022 2,023.20 0.00 RENT 2,023.20 001.000 04/25/2022 9
100-7146771-001 04/20/2022 183.28 167.00 RENT 16.28 001.000 04/25/2022 3
100-7146771-001 05/20/2022 183.28 0.00 RENT 183.28 001.000 04/25/2022 3
100-7045691-001 05/20/2022 244.57 0.00 RENT 244.57 001.000 04/25/2022 3
100-7059671-001 05/20/2022 60.00 0.00 MISC 64.20 001.000 04/25/2022 3
100-7237601-001 04/20/2022 0.00 0.00 RENT 34,192.91 001.000 04/25/2022 3
100-7242461-001 05/20/2022 57.00 0.00 MISC 60.99 001.000 04/25/2022 9
100-7178461-001 05/20/2022 197.45 0.00 MISC 209.30 001.000 04/25/2022 3
100-2611389-007 05/20/2022 171.76 0.00 RENT 171.76 001.000 04/25/2022 3
100-7037791-001 05/01/2022 444.00 0.00 MISC 478.41 001.000 04/25/2022 9
100-7203371-001 05/20/2022 1,566.40 0.00 RENT 1,566.40 001.000 04/25/2022 3
100-6630017-005 05/01/2022 0.00 178.55 MISC 0.01 001.000 04/25/2022 3
100-6738611-001 04/20/2022 0.00 0.00 RENT 4,545.94 001.000 04/25/2022 3
100-6738611-001 04/25/2022 0.00 0.00 RENT 4,545.94 001.000 04/25/2022 3
100-7052571-001 05/14/2022 255.87 0.00 MISC 278.90 001.000 04/25/2022 9
100-1011756-004 05/20/2022 1,001.64 0.00 MISC 1,081.77 001.000 04/25/2022 9
100-6849836-001 05/20/2022 1,077.47 0.00 RENT 1,077.47 001.000 04/25/2022 3
100-3492758-003 05/15/2022 312.41 0.00 RENT 312.41 001.000 04/25/2022 9
100-7156851-001 05/20/2022 150.00 0.00 MISC 159.00 001.000 04/25/2022 3
100-7232561-001 05/20/2022 113.60 0.00 MISC 122.12 001.000 04/25/2022 9
100-3876959-007 05/21/2022 61.07 0.00 RENT 61.07 001.000 06/21/2018 9
100-5382931-003 05/20/2022 146.69 0.00 RENT 146.69 001.000 04/26/2022 3
100-5722341-003 05/20/2022 170.00 0.00 MISC 181.90 001.000 04/26/2022 3
100-7150721-001 04/20/2022 174.96 0.00 RENT 174.96 001.000 04/26/2022 3
100-7150721-001 05/20/2022 174.96 0.00 RENT 174.96 001.000 04/26/2022 3
100-7165521-001 05/20/2022 1,417.88 0.00 RENT 1,417.88 001.000 04/26/2022 3
100-7227921-001 05/20/2022 64.00 0.00 MISC 69.28 001.000 04/26/2022 3
100-4858739-002 05/15/2022 208.00 0.00 MISC 225.16 001.000 04/26/2022 3
100-7100621-001 05/13/2022 880.10 0.00 MISC 954.90 001.000 04/26/2022 9
100-9725556-001 04/25/2022 0.00 0.00 RENT 600.77 001.000 04/26/2022 10
100-9725556-001 04/26/2022 0.00 0.00 RENT 600.77 001.000 04/26/2022 10
100-7209051-001 05/20/2022 1,652.01 0.00 RENT 1,652.01 001.000 04/26/2022 3
100-9660710-001 05/09/2022 174.75 0.00 RENT 174.75 001.000 06/09/2021 10
100-5329301-002 04/20/2022 0.00 0.00 RENT 263.44 001.000 04/26/2022 3
100-7087121-001 05/16/2022 3,294.46 1,125.58 RENT 3,294.46 001.000 02/16/2022 12
100-6602681-003 04/25/2022 0.00 0.00 RENT 478.00 001.000 04/26/2022 3
100-6602681-003 04/25/2022 0.00 0.00 RENT 478.00 001.000 04/26/2022 3
100-6754131-001 05/20/2022 747.75 0.00 RENT 747.75 001.000 04/26/2022 3
100-7214111-001 05/21/2022 542.97 0.00 RENT 542.97 001.000 04/26/2022 9
101-6898811-001 04/20/2022 0.00 0.00 RENT 15,035.55 001.000 04/26/2022 3
100-2406418-003 05/20/2022 200.00 0.00 MISC 219.00 001.000 04/26/2022 9
100-6943901-002 05/16/2022 236.40 0.00 MISC 257.67 001.000 04/26/2022 9
100-1623380-901 05/15/2022 1,769.11 0.00 RENT 1,769.11 001.000 04/26/2022 10
100-7107941-001 05/20/2022 1,038.95 0.00 RENT 1,038.95 001.000 02/23/2022 3
100-7031531-001 05/20/2022 120.00 0.00 MISC 130.92 001.000 04/26/2022 3
100-3630389-005 05/20/2022 168.00 0.00 MISC 181.86 001.000 04/26/2022 3
100-7174941-002 05/20/2022 1,667.38 0.00 MISC 1,804.93 001.000 04/26/2022 9
100-5204521-002 05/25/2022 3,222.20 0.00 RENT 3,222.20 001.000 04/26/2022 12
100-7241571-001 05/20/2022 55.00 0.00 MISC 59.54 001.000 04/26/2022 3
100-7182731-001 04/20/2022 0.00 0.00 RENT 1,025.37 001.000 04/26/2022 3
100-7182731-001 04/26/2022 0.00 0.00 RENT 1,025.37 001.000 04/26/2022 3
100-9726258-001 04/25/2022 0.00 0.00 RENT 255.97 001.000 04/26/2022 10
100-9726258-001 04/26/2022 0.00 0.00 RENT 255.97 001.000 04/26/2022 10
100-7220301-001 04/20/2022 0.00 0.00 RENT 1,238.00 001.000 04/26/2022 3
100-7151521-001 05/15/2022 94.00 0.00 MISC 102.46 001.000 04/26/2022 9
100-7237751-001 05/25/2022 2.00 101.65 MISC 2.14 001.000 04/26/2022 9
100-3876959-005 03/25/2022 0.00 0.00 RENT 60.74 001.000 09/27/2017 9
100-3910629-001 03/25/2022 0.00 0.00 RENT 245.81 001.000 03/30/2017 9
100-3876959-005 04/25/2022 0.00 0.00 RENT 60.74 001.000 09/27/2017 9
100-3910629-001 04/25/2022 0.00 0.00 RENT 245.81 001.000 03/30/2017 9
104-4687809-001 04/25/2022 -2,161.94 140.00 MISC 9.80 001.000 08/29/2018 9
100-3964329-001 04/28/2022 318.13 0.00 MISC 340.40 001.000 03/31/2017 9
100-3964329-001 03/28/2022 318.13 0.00 MISC 340.40 001.000 03/31/2017 9
100-1670517-003 04/16/2022 0.00 0.00 RENT 165.00 001.000 09/16/2021 3
100-4945021-001 05/15/2022 0.00 0.00 RENT 1,357.77 001.000 02/15/2019 3
100-3694757-001 05/01/2022 298.00 0.00 MISC 324.45 001.000 09/02/2016 9
100-3694757-001 04/01/2022 298.00 0.00 MISC 324.45 001.000 09/02/2016 9
100-6651721-002 07/25/2021 0.00 761.00 MISC 53.27 001.000 07/27/2021 9
100-6814061-001 04/06/2022 0.00 169.00 RENT 15.63 001.000 04/06/2022 3
100-7170651-001 04/07/2022 0.00 99.00 RENT 9.50 001.000 04/07/2022 3
100-2446458-002 04/06/2022 865.00 859.26 MISC 66.29 001.000 12/06/2016 9
100-2446458-002 05/06/2022 865.00 0.00 MISC 925.55 001.000 12/06/2016 9
102 records listed

@ -138,7 +138,7 @@ def ach(report: str, save_name: str):
"batch_num": [],
"payment_date": [],
"lessor": [],
"count": [],
#"count": [],
"total": []
}
@ -163,7 +163,8 @@ def ach(report: str, save_name: str):
batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
# Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
#print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
#batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
# Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
# So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
[extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
@ -397,54 +398,49 @@ def net_invest_trial_balance(report: str, save_name: str):
def lockbox(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"CustomerName" : [],
"PaymentDate" : [],
"InvoiceNumber" : [],
"CheckNumber" : [],
"InvoicePayment" : [],
"ContractNumber" : [],
"ContractPayment" : [],
"SEQ" : [],
"PYMT DATE" : [],
"INV NUM" : [],
"CHECK NUMBER" : [],
"PAYMENT AMOUNT" : [],
"NOTE" : [],
"IL SEQ" : [],
"CONTRACT NUM" : [],
"IL PAYMENT AMOUNT" : [],
"CUST NAME" : [],
}
# These are lists of the dictionary columns/keys and the data slots in which
# that data can be found in the report. this way we can iterate through them
# While extracting data
bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
# Below are the Regular Exppressions used to find relvant data lines
full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
contract_only_line = "\s{90}\d.{7}1\d{2}-"
cust_name_line = "\s{98}.{28}\D*"
# The data extractor allows us to extract data from the report using slots
# Slots are ranges of character denote by the list feed into the creation function
data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
columns = list(extracted_data_dict.keys())
data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
for line in enumerate(lines):
# We can skip empty lines
if len(line[1]) == 0: continue
# First we should check if there is a full line of data (defined by regex)
if re.search(full_line, line[1]):
# If this is true then we can iterate through the lists we created earlier and append the data to our dict
for k in range(0,len(bank_payment_records[0])):
extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
for k in range(0,len(infolease_payment_records[0])):
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
# Otherwise we should check if this is a line with only contract data
elif re.search(contract_only_line,line[1]):
# If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
for k in range(0,len(bank_payment_records[0])):
extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
for k in range(0,len(infolease_payment_records[0])):
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
# If it doesn't hit either of these critera then continue since it's irelevant data
else: continue
i = 1
# used to track how many lines below the current line we're looking for the customer name
# keep moving down a line and checking for a customer name
# Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
while re.search(cust_name_line,lines[line[0]+i]) == None:
match = False
# Try to find the first SEQ # & a contract payment date e.i. ' 197 05/10/2022'
if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
match = True
# Add all of the data points except customer name
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
# Check to see if this line contains only an infolease payment
# Some times there are multiple infolease payments for a single bank record
elif re.search(contract_number_regex, line[1]) != None:
match = True
# If there is then we can add the same data as the previous complete line
[extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
# Then add the new data for the infolease contract
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
# If we had a match we need a customer name to associate with it
# Sometimes these can appear on the next page hense the while loop searching for a match
if match:
# We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
# And the next line is blank
if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
i = 0
# Look for a bunch of whitespace then some writing
while not re.match("\s{98}.{34}", lines[line[0]+i]):
i +=1
# Once it hits, add the name to the dict
extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
# Once we find it add the cust name to the dict (it's the only thing on the line)
extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
# if the condition above isnt met then the cust name is on the next line (even if that line is blank)
else:
extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False)
return dataframe
@ -496,7 +492,7 @@ def payment_transactions(report: str, save_name: str):
for line in enumerate(lines):
slot1 = data_extractor(1,line[1],False)
if type(slot1) != str : continue
if re.search(contract_number_regex, slot1) != None:
if (re.search(contract_number_regex, slot1) or re.search("\d{3}\.\d{4}\.\d{4}", slot1))!= None:
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)]
tnum_match = re.search(transaction_num_regex, lines[line[0]+1])
if tnum_match:

@ -0,0 +1,142 @@
import os
import pandas as pd
from datetime import datetime as dt, timedelta
import re
from pathlib import Path
import time
import numpy as np
from pprint import pprint as prt
def pfd(df: pd.DataFrame):
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
print(df)
def create_line_divider(breakage_list: list):
"""
This allows for the creation of a custom data extractor
Breakage list defines the split points that will be used for the line
Example
Given breakage_list [10, 20, 30]
using slot_num 0 in the resulting extract_line_slot will yield
characters 0 - 10 from the string.
Slot 1 would give characters 10 - 20
"""
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
"""
Pulls data from a line/string using break points defined by the
parent function.
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
Will automatically convert numbers to floats
"""
# We can't have a slot number higher than the number of slots
assert(slot_num < len(breakage_list)+1)
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
# In order to create a float we need to remove the , from the string
data = line_string[low_range:high_range].strip().replace(",", "")
try: data = float(data)
except: pass
if debug:
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
return data
return extract_line_slot
def minv(report: str, save_name: str):
lines = report.splitlines()
data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
extracted_data_dict = {
"ContractNumber" : [],
"UTAB_OIC_DUE" : [],
"RentalDue" : [],
"UTAB_OIC_PYMT" : [],
"ChargeType" : [],
"OutstandBalance" : [],
"BizSegment" : [],
"BookingDate" : [],
"Branch" : [],
}
columns = list(extracted_data_dict.keys())
for line in enumerate(lines):
if re.search(contract_number_regex, line[1]) != None:
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
#All the list lengths need to be the same so if anything was missed it will fail to build
dataframe = pd.DataFrame(extracted_data_dict)
# ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0)
# dt.today().strftime("%m/%m/%Y")
filtered = dataframe[
((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\
((dataframe["RentalDue"] == 0 ) & (dataframe["OutstandBalance"] > 100))]
filtered.to_excel(save_name, index=False)
return filtered
current_output = [
'100-1011756-004',
'100-1354567-002',
'100-1637209-005',
'100-1665517-003',
'100-1670517-003',
'100-2081987-008',
'100-2139037-002',
'100-2446458-002',
'100-2453558-003',
'100-2611389-007',
'100-3492758-003',
'100-3500858-001',
'100-3694757-001',
'100-3725849-003',
'100-3876959-007',
'100-3910629-001',
'100-3964329-001',
'100-4462739-001',
'100-4850431-001',
'100-4945021-001',
'100-5382471-001',
'100-6738611-001',
'100-6849836-001',
'100-7037791-001',
'100-7045691-001',
'100-7052571-001',
'100-7059671-001',
'100-7087121-001',
'100-7107941-001',
'100-7146771-001',
'100-7156851-001',
'100-7178461-001',
'100-7203371-001',
'100-7219911-001',
'100-7232561-001',
'100-7237601-001',
'100-7242461-001',
'100-9660710-001',
'100-9723689-001',
]
contract_number_regex = "\d{3}-\d{7}-\d{3}"
with open("2022.05.04_MINV_C", errors="replace") as ifile:
report = ifile.read()
fin_df = minv(report, "man_inv_test.xlsx")
pfd(fin_df)
il_contracts = fin_df.ContractNumber.to_list()
prt(il_contracts)
extra_contracts = []
not_included = []
for c in il_contracts:
if c not in current_output:
extra_contracts.append(c)
for c in current_output:
if c not in il_contracts:
not_included.append(c)
print("\nExtra Contracts:")
prt(extra_contracts)
print("Not Included Contracts:")
prt(not_included)
print(f"MATCHING CONTRACTS: {il_contracts == current_output}")
print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}")
print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}")

@ -0,0 +1,110 @@
import os
import pandas as pd
from datetime import datetime as dt, timedelta
import sys, getopt
import re
from pathlib import Path
import time
import numpy as np
contract_number_regex = "\d{3}-\d{7}-\d{3}"
def create_line_divider(breakage_list: list):
"""
This allows for the creation of a custom data extractor
Breakage list defines the split points that will be used for the line
Example
Given breakage_list [10, 20, 30]
using slot_num 0 in the resulting extract_line_slot will yield
characters 0 - 10 from the string.
Slot 1 would give characters 10 - 20
"""
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
"""
Pulls data from a line/string using break points defined by the
parent function.
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
Will automatically convert numbers to floats
"""
# We can't have a slot number higher than the number of slots
assert(slot_num < len(breakage_list)+1)
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
# In order to create a float we need to remove the , from the string
data = line_string[low_range:high_range].strip().replace(",", "")
try: data = float(data)
except: pass
if debug:
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
return data
return extract_line_slot
def ach(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"ContractNumber" : [],
"CustomerName" : [],
"BankCode" : [],
"BankNumber": [],
"AccountNumber" : [],
"Payment" : [],
"Batch": [],
"Lessor": [],
"PaymentDate": [],
}
columns = list(extracted_data_dict.keys())
batches = {
"batch_num": [],
"payment_date": [],
"lessor": [],
#"count": [],
"total": []
}
data_extractor = create_line_divider([19,57,67,82,104])
bank_number_regex = "\d{9}"
batch_num_regex = "BATCH \d{4} TOTAL"
for line in enumerate(lines):
# Check for a contract number and a bank number in the line
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
# Iterates through the columns list and adds the corresponding slot number to the dictonary for the column
# Here the order of the columns (keys in dictonary) matter since they need to be in the same order as
# the slot numbers
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)]
# This searches for a statement that looks like a batch number
# This sums the contracts by thier lessor code. A feature requested by cash apps
if re.search(batch_num_regex, line[1]) != None:
# Batch number is always in characters 96 to 101
batches["batch_num"].append(line[1][96:101])
# Payment date will be 2 lines below that between charactes 114 and 125
batches["payment_date"].append(lines[line[0]+2][114:125])
# Lessor is just the first three number sof the contract number
batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
# Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
#print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
#batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
# Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
# So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
[extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
[extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))]
[extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))]
# Now the dictioanry lists should all be equal lengths and we can create a dataframe
dataframe = pd.DataFrame(extracted_data_dict)
# We're creating two sheets: data & summary so we need to open and excel writer
# This also helps with a bug caused by larger dataframes
with pd.ExcelWriter(save_name) as writer:
dataframe.to_excel(writer, index=False, sheet_name="data")
# The batches dictioanry is converted to a dataframe and added as it's own sheet
pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary")
return dataframe
r1 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.05.27_ACH_C"
r2 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.06.03_ACH_C"
with open(r2, errors="replace") as ifile:
report = ifile.read()
ach(report, "test_ach_0613.xlsx")

168
lbf.py

@ -0,0 +1,168 @@
import os
import pandas as pd
from datetime import datetime as dt, timedelta
import sys, getopt
import re
from pathlib import Path
import time
import numpy as np
from pprint import pprint as prt
contract_number_regex = "\d{3}-\d{7}-\d{3}"
def dict_lens(dictionary):
columns = list(dictionary.keys())
for c in columns:
print(f"{c} : {len(dictionary[c])}")
def create_line_divider(breakage_list: list):
"""
This allows for the creation of a custom data extractor
Breakage list defines the split points that will be used for the line
Example
Given breakage_list [10, 20, 30]
using slot_num 0 in the resulting extract_line_slot will yield
characters 0 - 10 from the string.
Slot 1 would give characters 10 - 20
"""
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
"""
Pulls data from a line/string using break points defined by the
parent function.
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
Will automatically convert numbers to floats
"""
# We can't have a slot number higher than the number of slots
assert(slot_num < len(breakage_list)+1)
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
# In order to create a float we need to remove the , from the string
data = line_string[low_range:high_range].strip().replace(",", "")
try: data = float(data)
except: pass
if debug:
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
return data
return extract_line_slot
def lockbox(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"CustomerName" : [],
"PaymentDate" : [],
"InvoiceNumber" : [],
"CheckNumber" : [],
"InvoicePayment" : [],
"ContractNumber" : [],
"ContractPayment" : [],
}
# These are lists of the dictionary columns/keys and the data slots in which
# that data can be found in the report. this way we can iterate through them
# While extracting data
bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
# Below are the Regular Exppressions used to find relvant data lines
full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
contract_only_line = "\s{90}\d.{7}1\d{2}-"
cust_name_line = "\s{98}.{28}\D*"
# The data extractor allows us to extract data from the report using slots
# Slots are ranges of character denote by the list feed into the creation function
data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
for line in enumerate(lines):
# We can skip empty lines
if len(line[1]) == 0: continue
# First we should check if there is a full line of data (defined by regex)
if re.search(full_line, line[1]):
# If this is true then we can iterate through the lists we created earlier and append the data to our dict
for k in range(0,len(bank_payment_records[0])):
extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
for k in range(0,len(infolease_payment_records[0])):
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
# Otherwise we should check if this is a line with only contract data
elif re.search(contract_only_line,line[1]):
# If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
for k in range(0,len(bank_payment_records[0])):
extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
for k in range(0,len(infolease_payment_records[0])):
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
# If it doesn't hit either of these critera then continue since it's irelevant data
else: continue
i = 1
# used to track how many lines below the current line we're looking for the customer name
# keep moving down a line and checking for a customer name
# Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
while re.search(cust_name_line,lines[line[0]+i]) == None:
i += 1
# Once it hits, add the name to the dict
extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False)
return dataframe
def lb2(report:str, save_name:str):
lines = report.splitlines()
extracted_data_dict = {
"SEQ" : [],
"PYMT DATE" : [],
"INV NUM" : [],
"CHECK NUMBER" : [],
"PAYMENT AMOUNT" : [],
"NOTE" : [],
"IL SEQ" : [],
"CONTRACT NUM" : [],
"IL PAYMENT AMOUNT" : [],
"CUST NAME" : [],
}
columns = list(extracted_data_dict.keys())
data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
for line in enumerate(lines):
match = False
# Try to find the first SEQ # & a contract payment date e.i. ' 197 05/10/2022'
if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
match = True
# Add all of the data points except customer name
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
# Check to see if this line contains only an infolease payment
# Some times there are multiple infolease payments for a single bank record
elif re.search(contract_number_regex, line[1]) != None:
match = True
# If there is then we can add the same data as the previous complete line
[extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
# Then add the new data for the infolease contract
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
# If we had a match we need a customer name to associate with it
# Sometimes these can appear on the next page hense the while loop searching for a match
if match:
# We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
# And the next line is blank
if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
i = 0
# Look for a bunch of whitespace then some writing
while not re.match("\s{98}.{34}", lines[line[0]+i]):
i +=1
# Once we find it add the cust name to the dict (it's the only thing on the line)
extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
# if the condition above isnt met then the cust name is on the next line (even if that line is blank)
else:
extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False)
return dataframe
r1 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.10_LOCKBOX_094_C"
r2 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.11_LOCKBOX_094_C"
with open(r1, errors="replace") as ifile:
report = ifile.read()
lb2(report, "test_lb_0510.xlsx")
with open(r2, errors="replace") as ifile:
report = ifile.read()
lb2(report, "test_lb_0511.xlsx")

@ -23,7 +23,6 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
self.processReportButton.clicked.connect(self.process_selection)
self.openReportButton.clicked.connect(self.to_clipboard)
def getfile(self):
inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file')
self.inputFileLE.setText(inFile[0])

@ -21,7 +21,7 @@ class Ui_MainWindow(object):
icon2 = QtGui.QIcon()
icon2.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
iconCopy = QtGui.QIcon()
iconCopy.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
iconCopy.addPixmap(QtGui.QPixmap("copy.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
iconProcess = QtGui.QIcon()
iconProcess.addPixmap(QtGui.QPixmap("process.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
MainWindow.setWindowIcon(icon)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.
Loading…
Cancel
Save