diff --git a/2022.05.04_MINV_C b/2022.05.04_MINV_C new file mode 100644 index 0000000..ae1bfab --- /dev/null +++ b/2022.05.04_MINV_C @@ -0,0 +1,112 @@ + + + DAILY.MANUAL.INVOICE + CONTRACTS THAT WERE NOT INVOICED + PAGE 04-26-22 1 + + CHRG BUSINESS +CONTRACT.NO........ UATB.OIC.DUE RENTAL DUE......... UATB.IDS.OIC.PAYME TYPEM..... OUTSTANDING BALANCE.... SEGMENT. BOOKING.DATE BRANCH + +100-2453558-003 05/09/2022 183.71 0.00 MISC 201.16 001.000 03/09/2018 9 +100-2453558-003 04/09/2022 183.71 180.67 MISC 20.49 001.000 03/09/2018 9 +100-2453558-003 03/09/2022 183.71 183.71 MISC 17.45 001.000 03/09/2018 9 +100-1665517-003 05/15/2022 412.97 0.00 MISC 438.78 001.000 10/31/2014 9 +100-4850431-001 05/10/2022 411.80 0.00 MISC 441.21 001.000 12/10/2018 3 +100-4462739-001 04/18/2022 157.08 0.00 RENT 157.08 001.000 06/18/2018 9 +100-4850431-001 04/10/2022 411.80 0.00 MISC 441.21 001.000 12/10/2018 3 +100-3500858-001 05/12/2022 262.37 0.00 MISC 279.42 001.000 04/13/2016 9 +100-3725849-003 05/13/2022 559.32 0.00 MISC 612.45 001.000 10/19/2017 9 +100-3500858-001 04/12/2022 262.37 0.00 MISC 279.42 001.000 04/13/2016 9 +047-2580598-001 04/05/2022 0.00 0.00 MISC 72.53 001.000 03/06/2014 9 +100-3725849-003 03/13/2022 559.32 559.32 MISC 53.13 001.000 10/19/2017 9 +100-4566489-001 04/14/2022 0.00 354.04 MISC 2.25 001.000 06/14/2018 9 +100-4566489-001 05/14/2022 0.00 328.00 MISC 28.29 001.000 06/14/2018 9 +100-5382471-001 04/15/2022 1,128.00 0.00 MISC 1,228.11 001.000 10/09/2019 9 +100-5382471-001 05/15/2022 1,128.00 0.00 MISC 1,228.11 001.000 10/09/2019 9 +100-9723689-001 04/20/2022 0.00 0.00 RENT 571.58 001.000 04/20/2022 10 +100-9723689-001 04/20/2022 0.00 0.00 RENT 571.58 001.000 04/20/2022 10 +100-7219911-001 04/20/2022 0.00 0.00 RENT 813.08 001.000 04/20/2022 9 +100-1354567-002 05/25/2022 170.00 0.00 RENT 170.00 001.000 05/31/2016 9 +100-6651721-001 07/14/2021 0.00 761.00 MISC 53.27 001.000 07/14/2021 9 +100-2081987-008 05/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9 +100-2139037-002 05/20/2022 105.00 0.00 MISC 111.67 001.000 03/20/2017 9 +100-3725849-003 04/13/2022 559.32 440.68 MISC 171.77 001.000 10/19/2017 9 +100-3344078-002 05/01/2022 -28.53 18.47 RENT 63.00 001.000 10/01/2020 9 +100-2081987-008 03/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9 +100-1354567-002 04/25/2022 170.00 0.00 RENT 170.00 001.000 05/31/2016 9 +100-2081987-008 04/25/2022 407.15 0.00 MISC 439.73 001.000 10/02/2017 9 +100-2081987-008 02/25/2022 407.15 407.15 MISC 32.58 001.000 10/02/2017 9 +100-3876959-007 04/21/2022 61.07 0.00 RENT 61.07 001.000 06/21/2018 9 +100-1637209-005 05/20/2022 2,023.20 0.00 RENT 2,023.20 001.000 04/25/2022 9 +100-7146771-001 04/20/2022 183.28 167.00 RENT 16.28 001.000 04/25/2022 3 +100-7146771-001 05/20/2022 183.28 0.00 RENT 183.28 001.000 04/25/2022 3 +100-7045691-001 05/20/2022 244.57 0.00 RENT 244.57 001.000 04/25/2022 3 +100-7059671-001 05/20/2022 60.00 0.00 MISC 64.20 001.000 04/25/2022 3 +100-7237601-001 04/20/2022 0.00 0.00 RENT 34,192.91 001.000 04/25/2022 3 +100-7242461-001 05/20/2022 57.00 0.00 MISC 60.99 001.000 04/25/2022 9 +100-7178461-001 05/20/2022 197.45 0.00 MISC 209.30 001.000 04/25/2022 3 +100-2611389-007 05/20/2022 171.76 0.00 RENT 171.76 001.000 04/25/2022 3 +100-7037791-001 05/01/2022 444.00 0.00 MISC 478.41 001.000 04/25/2022 9 +100-7203371-001 05/20/2022 1,566.40 0.00 RENT 1,566.40 001.000 04/25/2022 3 +100-6630017-005 05/01/2022 0.00 178.55 MISC 0.01 001.000 04/25/2022 3 +100-6738611-001 04/20/2022 0.00 0.00 RENT 4,545.94 001.000 04/25/2022 3 +100-6738611-001 04/25/2022 0.00 0.00 RENT 4,545.94 001.000 04/25/2022 3 +100-7052571-001 05/14/2022 255.87 0.00 MISC 278.90 001.000 04/25/2022 9 +100-1011756-004 05/20/2022 1,001.64 0.00 MISC 1,081.77 001.000 04/25/2022 9 +100-6849836-001 05/20/2022 1,077.47 0.00 RENT 1,077.47 001.000 04/25/2022 3 +100-3492758-003 05/15/2022 312.41 0.00 RENT 312.41 001.000 04/25/2022 9 +100-7156851-001 05/20/2022 150.00 0.00 MISC 159.00 001.000 04/25/2022 3 +100-7232561-001 05/20/2022 113.60 0.00 MISC 122.12 001.000 04/25/2022 9 +100-3876959-007 05/21/2022 61.07 0.00 RENT 61.07 001.000 06/21/2018 9 +100-5382931-003 05/20/2022 146.69 0.00 RENT 146.69 001.000 04/26/2022 3 +100-5722341-003 05/20/2022 170.00 0.00 MISC 181.90 001.000 04/26/2022 3 +100-7150721-001 04/20/2022 174.96 0.00 RENT 174.96 001.000 04/26/2022 3 +100-7150721-001 05/20/2022 174.96 0.00 RENT 174.96 001.000 04/26/2022 3 +100-7165521-001 05/20/2022 1,417.88 0.00 RENT 1,417.88 001.000 04/26/2022 3 +100-7227921-001 05/20/2022 64.00 0.00 MISC 69.28 001.000 04/26/2022 3 +100-4858739-002 05/15/2022 208.00 0.00 MISC 225.16 001.000 04/26/2022 3 +100-7100621-001 05/13/2022 880.10 0.00 MISC 954.90 001.000 04/26/2022 9 +100-9725556-001 04/25/2022 0.00 0.00 RENT 600.77 001.000 04/26/2022 10 +100-9725556-001 04/26/2022 0.00 0.00 RENT 600.77 001.000 04/26/2022 10 +100-7209051-001 05/20/2022 1,652.01 0.00 RENT 1,652.01 001.000 04/26/2022 3 +100-9660710-001 05/09/2022 174.75 0.00 RENT 174.75 001.000 06/09/2021 10 +100-5329301-002 04/20/2022 0.00 0.00 RENT 263.44 001.000 04/26/2022 3 +100-7087121-001 05/16/2022 3,294.46 1,125.58 RENT 3,294.46 001.000 02/16/2022 12 +100-6602681-003 04/25/2022 0.00 0.00 RENT 478.00 001.000 04/26/2022 3 +100-6602681-003 04/25/2022 0.00 0.00 RENT 478.00 001.000 04/26/2022 3 +100-6754131-001 05/20/2022 747.75 0.00 RENT 747.75 001.000 04/26/2022 3 +100-7214111-001 05/21/2022 542.97 0.00 RENT 542.97 001.000 04/26/2022 9 +101-6898811-001 04/20/2022 0.00 0.00 RENT 15,035.55 001.000 04/26/2022 3 +100-2406418-003 05/20/2022 200.00 0.00 MISC 219.00 001.000 04/26/2022 9 +100-6943901-002 05/16/2022 236.40 0.00 MISC 257.67 001.000 04/26/2022 9 +100-1623380-901 05/15/2022 1,769.11 0.00 RENT 1,769.11 001.000 04/26/2022 10 +100-7107941-001 05/20/2022 1,038.95 0.00 RENT 1,038.95 001.000 02/23/2022 3 +100-7031531-001 05/20/2022 120.00 0.00 MISC 130.92 001.000 04/26/2022 3 +100-3630389-005 05/20/2022 168.00 0.00 MISC 181.86 001.000 04/26/2022 3 +100-7174941-002 05/20/2022 1,667.38 0.00 MISC 1,804.93 001.000 04/26/2022 9 +100-5204521-002 05/25/2022 3,222.20 0.00 RENT 3,222.20 001.000 04/26/2022 12 +100-7241571-001 05/20/2022 55.00 0.00 MISC 59.54 001.000 04/26/2022 3 +100-7182731-001 04/20/2022 0.00 0.00 RENT 1,025.37 001.000 04/26/2022 3 +100-7182731-001 04/26/2022 0.00 0.00 RENT 1,025.37 001.000 04/26/2022 3 +100-9726258-001 04/25/2022 0.00 0.00 RENT 255.97 001.000 04/26/2022 10 +100-9726258-001 04/26/2022 0.00 0.00 RENT 255.97 001.000 04/26/2022 10 +100-7220301-001 04/20/2022 0.00 0.00 RENT 1,238.00 001.000 04/26/2022 3 +100-7151521-001 05/15/2022 94.00 0.00 MISC 102.46 001.000 04/26/2022 9 +100-7237751-001 05/25/2022 2.00 101.65 MISC 2.14 001.000 04/26/2022 9 +100-3876959-005 03/25/2022 0.00 0.00 RENT 60.74 001.000 09/27/2017 9 +100-3910629-001 03/25/2022 0.00 0.00 RENT 245.81 001.000 03/30/2017 9 +100-3876959-005 04/25/2022 0.00 0.00 RENT 60.74 001.000 09/27/2017 9 +100-3910629-001 04/25/2022 0.00 0.00 RENT 245.81 001.000 03/30/2017 9 +104-4687809-001 04/25/2022 -2,161.94 140.00 MISC 9.80 001.000 08/29/2018 9 +100-3964329-001 04/28/2022 318.13 0.00 MISC 340.40 001.000 03/31/2017 9 +100-3964329-001 03/28/2022 318.13 0.00 MISC 340.40 001.000 03/31/2017 9 +100-1670517-003 04/16/2022 0.00 0.00 RENT 165.00 001.000 09/16/2021 3 +100-4945021-001 05/15/2022 0.00 0.00 RENT 1,357.77 001.000 02/15/2019 3 +100-3694757-001 05/01/2022 298.00 0.00 MISC 324.45 001.000 09/02/2016 9 +100-3694757-001 04/01/2022 298.00 0.00 MISC 324.45 001.000 09/02/2016 9 +100-6651721-002 07/25/2021 0.00 761.00 MISC 53.27 001.000 07/27/2021 9 +100-6814061-001 04/06/2022 0.00 169.00 RENT 15.63 001.000 04/06/2022 3 +100-7170651-001 04/07/2022 0.00 99.00 RENT 9.50 001.000 04/07/2022 3 +100-2446458-002 04/06/2022 865.00 859.26 MISC 66.29 001.000 12/06/2016 9 +100-2446458-002 05/06/2022 865.00 0.00 MISC 925.55 001.000 12/06/2016 9 +102 records listed diff --git a/ILExtract.py b/ILExtract.py index 3eabbf3..9ed525f 100644 --- a/ILExtract.py +++ b/ILExtract.py @@ -138,7 +138,7 @@ def ach(report: str, save_name: str): "batch_num": [], "payment_date": [], "lessor": [], - "count": [], + #"count": [], "total": [] } @@ -163,7 +163,8 @@ def ach(report: str, save_name: str): batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3]) # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float batches["total"].append(float(line[1][107:125].strip().replace(",", ""))) - batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", ""))) + #print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}") + #batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", ""))) # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))] @@ -397,54 +398,49 @@ def net_invest_trial_balance(report: str, save_name: str): def lockbox(report: str, save_name: str): lines = report.splitlines() extracted_data_dict = { - "CustomerName" : [], - "PaymentDate" : [], - "InvoiceNumber" : [], - "CheckNumber" : [], - "InvoicePayment" : [], - "ContractNumber" : [], - "ContractPayment" : [], + "SEQ" : [], + "PYMT DATE" : [], + "INV NUM" : [], + "CHECK NUMBER" : [], + "PAYMENT AMOUNT" : [], + "NOTE" : [], + "IL SEQ" : [], + "CONTRACT NUM" : [], + "IL PAYMENT AMOUNT" : [], + "CUST NAME" : [], } - # These are lists of the dictionary columns/keys and the data slots in which - # that data can be found in the report. this way we can iterate through them - # While extracting data - bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]] - infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]] - - # Below are the Regular Exppressions used to find relvant data lines - full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1" - contract_only_line = "\s{90}\d.{7}1\d{2}-" - cust_name_line = "\s{98}.{28}\D*" - # The data extractor allows us to extract data from the report using slots - # Slots are ranges of character denote by the list feed into the creation function - data_extractor = create_line_divider([9,19,39,56,69,90,98,118]) + columns = list(extracted_data_dict.keys()) + data_extractor = create_line_divider([9,19,39,56,69,89,98,118]) for line in enumerate(lines): - # We can skip empty lines - if len(line[1]) == 0: continue - # First we should check if there is a full line of data (defined by regex) - if re.search(full_line, line[1]): - # If this is true then we can iterate through the lists we created earlier and append the data to our dict - for k in range(0,len(bank_payment_records[0])): - extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1])) - for k in range(0,len(infolease_payment_records[0])): - extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) - # Otherwise we should check if this is a line with only contract data - elif re.search(contract_only_line,line[1]): - # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract - for k in range(0,len(bank_payment_records[0])): - extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1]) - for k in range(0,len(infolease_payment_records[0])): - extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) - # If it doesn't hit either of these critera then continue since it's irelevant data - else: continue - i = 1 - # used to track how many lines below the current line we're looking for the customer name - # keep moving down a line and checking for a customer name - # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end - while re.search(cust_name_line,lines[line[0]+i]) == None: - i += 1 - # Once it hits, add the name to the dict - extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i])) + match = False + # Try to find the first SEQ # & a contract payment date e.i. ' 197 05/10/2022' + if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]): + match = True + # Add all of the data points except customer name + [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)] + # Check to see if this line contains only an infolease payment + # Some times there are multiple infolease payments for a single bank record + elif re.search(contract_number_regex, line[1]) != None: + match = True + # If there is then we can add the same data as the previous complete line + [extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)] + # Then add the new data for the infolease contract + [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)] + # If we had a match we need a customer name to associate with it + # Sometimes these can appear on the next page hense the while loop searching for a match + if match: + # We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line + # And the next line is blank + if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1): + i = 0 + # Look for a bunch of whitespace then some writing + while not re.match("\s{98}.{34}", lines[line[0]+i]): + i +=1 + # Once we find it add the cust name to the dict (it's the only thing on the line) + extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip()) + # if the condition above isnt met then the cust name is on the next line (even if that line is blank) + else: + extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip()) dataframe = pd.DataFrame(extracted_data_dict) dataframe.to_excel(save_name, index=False) return dataframe @@ -496,7 +492,7 @@ def payment_transactions(report: str, save_name: str): for line in enumerate(lines): slot1 = data_extractor(1,line[1],False) if type(slot1) != str : continue - if re.search(contract_number_regex, slot1) != None: + if (re.search(contract_number_regex, slot1) or re.search("\d{3}\.\d{4}\.\d{4}", slot1))!= None: [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)] tnum_match = re.search(transaction_num_regex, lines[line[0]+1]) if tnum_match: diff --git a/ManualInvoice.py b/ManualInvoice.py new file mode 100644 index 0000000..28f494d --- /dev/null +++ b/ManualInvoice.py @@ -0,0 +1,142 @@ +import os +import pandas as pd +from datetime import datetime as dt, timedelta +import re +from pathlib import Path +import time +import numpy as np +from pprint import pprint as prt + + +def pfd(df: pd.DataFrame): + with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also + print(df) + + +def create_line_divider(breakage_list: list): + """ + This allows for the creation of a custom data extractor + Breakage list defines the split points that will be used for the line + Example + Given breakage_list [10, 20, 30] + using slot_num 0 in the resulting extract_line_slot will yield + characters 0 - 10 from the string. + Slot 1 would give characters 10 - 20 + """ + def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): + """ + Pulls data from a line/string using break points defined by the + parent function. + ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' + Will automatically convert numbers to floats + """ + # We can't have a slot number higher than the number of slots + assert(slot_num < len(breakage_list)+1) + low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] + high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + # In order to create a float we need to remove the , from the string + data = line_string[low_range:high_range].strip().replace(",", "") + try: data = float(data) + except: pass + if debug: + print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") + return data + return extract_line_slot + + +def minv(report: str, save_name: str): + lines = report.splitlines() + data_extractor = create_line_divider([15,32,52,71,83,107,116,128]) + extracted_data_dict = { + "ContractNumber" : [], + "UTAB_OIC_DUE" : [], + "RentalDue" : [], + "UTAB_OIC_PYMT" : [], + "ChargeType" : [], + "OutstandBalance" : [], + "BizSegment" : [], + "BookingDate" : [], + "Branch" : [], + } + columns = list(extracted_data_dict.keys()) + for line in enumerate(lines): + if re.search(contract_number_regex, line[1]) != None: + [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))] + #All the list lengths need to be the same so if anything was missed it will fail to build + dataframe = pd.DataFrame(extracted_data_dict) + # ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0) + # dt.today().strftime("%m/%m/%Y") + filtered = dataframe[ + ((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\ + ((dataframe["RentalDue"] == 0 ) & (dataframe["OutstandBalance"] > 100))] + filtered.to_excel(save_name, index=False) + return filtered + +current_output = [ + '100-1011756-004', + '100-1354567-002', + '100-1637209-005', + '100-1665517-003', + '100-1670517-003', + '100-2081987-008', + '100-2139037-002', + '100-2446458-002', + '100-2453558-003', + '100-2611389-007', + '100-3492758-003', + '100-3500858-001', + '100-3694757-001', + '100-3725849-003', + '100-3876959-007', + '100-3910629-001', + '100-3964329-001', + '100-4462739-001', + '100-4850431-001', + '100-4945021-001', + '100-5382471-001', + '100-6738611-001', + '100-6849836-001', + '100-7037791-001', + '100-7045691-001', + '100-7052571-001', + '100-7059671-001', + '100-7087121-001', + '100-7107941-001', + '100-7146771-001', + '100-7156851-001', + '100-7178461-001', + '100-7203371-001', + '100-7219911-001', + '100-7232561-001', + '100-7237601-001', + '100-7242461-001', + '100-9660710-001', + '100-9723689-001', +] + +contract_number_regex = "\d{3}-\d{7}-\d{3}" + +with open("2022.05.04_MINV_C", errors="replace") as ifile: + report = ifile.read() + +fin_df = minv(report, "man_inv_test.xlsx") +pfd(fin_df) +il_contracts = fin_df.ContractNumber.to_list() +prt(il_contracts) + +extra_contracts = [] +not_included = [] +for c in il_contracts: + if c not in current_output: + extra_contracts.append(c) +for c in current_output: + if c not in il_contracts: + not_included.append(c) + +print("\nExtra Contracts:") +prt(extra_contracts) +print("Not Included Contracts:") +prt(not_included) +print(f"MATCHING CONTRACTS: {il_contracts == current_output}") +print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}") +print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}") \ No newline at end of file diff --git a/ach_fix.py b/ach_fix.py new file mode 100644 index 0000000..1eb01da --- /dev/null +++ b/ach_fix.py @@ -0,0 +1,110 @@ +import os +import pandas as pd +from datetime import datetime as dt, timedelta +import sys, getopt +import re +from pathlib import Path +import time +import numpy as np + + +contract_number_regex = "\d{3}-\d{7}-\d{3}" + + +def create_line_divider(breakage_list: list): + """ + This allows for the creation of a custom data extractor + Breakage list defines the split points that will be used for the line + Example + Given breakage_list [10, 20, 30] + using slot_num 0 in the resulting extract_line_slot will yield + characters 0 - 10 from the string. + Slot 1 would give characters 10 - 20 + """ + def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): + """ + Pulls data from a line/string using break points defined by the + parent function. + ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' + Will automatically convert numbers to floats + """ + # We can't have a slot number higher than the number of slots + assert(slot_num < len(breakage_list)+1) + low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] + high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + # In order to create a float we need to remove the , from the string + data = line_string[low_range:high_range].strip().replace(",", "") + try: data = float(data) + except: pass + if debug: + print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") + return data + return extract_line_slot + +def ach(report: str, save_name: str): + lines = report.splitlines() + extracted_data_dict = { + "ContractNumber" : [], + "CustomerName" : [], + "BankCode" : [], + "BankNumber": [], + "AccountNumber" : [], + "Payment" : [], + "Batch": [], + "Lessor": [], + "PaymentDate": [], + } + columns = list(extracted_data_dict.keys()) + batches = { + "batch_num": [], + "payment_date": [], + "lessor": [], + #"count": [], + "total": [] + } + + data_extractor = create_line_divider([19,57,67,82,104]) + bank_number_regex = "\d{9}" + batch_num_regex = "BATCH \d{4} TOTAL" + for line in enumerate(lines): + # Check for a contract number and a bank number in the line + if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None): + # Iterates through the columns list and adds the corresponding slot number to the dictonary for the column + # Here the order of the columns (keys in dictonary) matter since they need to be in the same order as + # the slot numbers + [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)] + # This searches for a statement that looks like a batch number + # This sums the contracts by thier lessor code. A feature requested by cash apps + if re.search(batch_num_regex, line[1]) != None: + # Batch number is always in characters 96 to 101 + batches["batch_num"].append(line[1][96:101]) + # Payment date will be 2 lines below that between charactes 114 and 125 + batches["payment_date"].append(lines[line[0]+2][114:125]) + # Lessor is just the first three number sof the contract number + batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3]) + # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float + batches["total"].append(float(line[1][107:125].strip().replace(",", ""))) + #print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}") + #batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", ""))) + # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place + # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already + [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))] + [extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))] + [extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))] + # Now the dictioanry lists should all be equal lengths and we can create a dataframe + dataframe = pd.DataFrame(extracted_data_dict) + # We're creating two sheets: data & summary so we need to open and excel writer + # This also helps with a bug caused by larger dataframes + with pd.ExcelWriter(save_name) as writer: + dataframe.to_excel(writer, index=False, sheet_name="data") + # The batches dictioanry is converted to a dataframe and added as it's own sheet + pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary") + return dataframe + +r1 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.05.27_ACH_C" +r2 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.06.03_ACH_C" + +with open(r2, errors="replace") as ifile: + report = ifile.read() + +ach(report, "test_ach_0613.xlsx") \ No newline at end of file diff --git a/lbf.py b/lbf.py new file mode 100644 index 0000000..0d9cfeb --- /dev/null +++ b/lbf.py @@ -0,0 +1,168 @@ +import os +import pandas as pd +from datetime import datetime as dt, timedelta +import sys, getopt +import re +from pathlib import Path +import time +import numpy as np +from pprint import pprint as prt + + +contract_number_regex = "\d{3}-\d{7}-\d{3}" + +def dict_lens(dictionary): + columns = list(dictionary.keys()) + for c in columns: + print(f"{c} : {len(dictionary[c])}") + + + +def create_line_divider(breakage_list: list): + """ + This allows for the creation of a custom data extractor + Breakage list defines the split points that will be used for the line + Example + Given breakage_list [10, 20, 30] + using slot_num 0 in the resulting extract_line_slot will yield + characters 0 - 10 from the string. + Slot 1 would give characters 10 - 20 + """ + def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): + """ + Pulls data from a line/string using break points defined by the + parent function. + ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' + Will automatically convert numbers to floats + """ + # We can't have a slot number higher than the number of slots + assert(slot_num < len(breakage_list)+1) + low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] + high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + # In order to create a float we need to remove the , from the string + data = line_string[low_range:high_range].strip().replace(",", "") + try: data = float(data) + except: pass + if debug: + print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") + return data + return extract_line_slot + +def lockbox(report: str, save_name: str): + lines = report.splitlines() + extracted_data_dict = { + "CustomerName" : [], + "PaymentDate" : [], + "InvoiceNumber" : [], + "CheckNumber" : [], + "InvoicePayment" : [], + "ContractNumber" : [], + "ContractPayment" : [], + } + # These are lists of the dictionary columns/keys and the data slots in which + # that data can be found in the report. this way we can iterate through them + # While extracting data + bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]] + infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]] + + # Below are the Regular Exppressions used to find relvant data lines + full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1" + contract_only_line = "\s{90}\d.{7}1\d{2}-" + cust_name_line = "\s{98}.{28}\D*" + # The data extractor allows us to extract data from the report using slots + # Slots are ranges of character denote by the list feed into the creation function + data_extractor = create_line_divider([9,19,39,56,69,90,98,118]) + for line in enumerate(lines): + # We can skip empty lines + if len(line[1]) == 0: continue + # First we should check if there is a full line of data (defined by regex) + if re.search(full_line, line[1]): + # If this is true then we can iterate through the lists we created earlier and append the data to our dict + for k in range(0,len(bank_payment_records[0])): + extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1])) + for k in range(0,len(infolease_payment_records[0])): + extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) + # Otherwise we should check if this is a line with only contract data + elif re.search(contract_only_line,line[1]): + # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract + for k in range(0,len(bank_payment_records[0])): + extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1]) + for k in range(0,len(infolease_payment_records[0])): + extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) + # If it doesn't hit either of these critera then continue since it's irelevant data + else: continue + i = 1 + # used to track how many lines below the current line we're looking for the customer name + # keep moving down a line and checking for a customer name + # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end + while re.search(cust_name_line,lines[line[0]+i]) == None: + i += 1 + # Once it hits, add the name to the dict + extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i])) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False) + return dataframe + + +def lb2(report:str, save_name:str): + lines = report.splitlines() + extracted_data_dict = { + "SEQ" : [], + "PYMT DATE" : [], + "INV NUM" : [], + "CHECK NUMBER" : [], + "PAYMENT AMOUNT" : [], + "NOTE" : [], + "IL SEQ" : [], + "CONTRACT NUM" : [], + "IL PAYMENT AMOUNT" : [], + "CUST NAME" : [], + } + columns = list(extracted_data_dict.keys()) + data_extractor = create_line_divider([9,19,39,56,69,89,98,118]) + for line in enumerate(lines): + match = False + # Try to find the first SEQ # & a contract payment date e.i. ' 197 05/10/2022' + if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]): + match = True + # Add all of the data points except customer name + [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)] + # Check to see if this line contains only an infolease payment + # Some times there are multiple infolease payments for a single bank record + elif re.search(contract_number_regex, line[1]) != None: + match = True + # If there is then we can add the same data as the previous complete line + [extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)] + # Then add the new data for the infolease contract + [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)] + # If we had a match we need a customer name to associate with it + # Sometimes these can appear on the next page hense the while loop searching for a match + if match: + # We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line + # And the next line is blank + if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1): + i = 0 + # Look for a bunch of whitespace then some writing + while not re.match("\s{98}.{34}", lines[line[0]+i]): + i +=1 + # Once we find it add the cust name to the dict (it's the only thing on the line) + extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip()) + # if the condition above isnt met then the cust name is on the next line (even if that line is blank) + else: + extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip()) + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False) + return dataframe + + +r1 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.10_LOCKBOX_094_C" +r2 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.11_LOCKBOX_094_C" + +with open(r1, errors="replace") as ifile: + report = ifile.read() + +lb2(report, "test_lb_0510.xlsx") + +with open(r2, errors="replace") as ifile: + report = ifile.read() +lb2(report, "test_lb_0511.xlsx") \ No newline at end of file diff --git a/main.py b/main.py index bd84bd8..e28425e 100644 --- a/main.py +++ b/main.py @@ -22,7 +22,6 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): self.outputFileButton.clicked.connect(self.setOutput) self.processReportButton.clicked.connect(self.process_selection) self.openReportButton.clicked.connect(self.to_clipboard) - def getfile(self): inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file') diff --git a/mainWindow_new.py b/mainWindow_new.py index 1b9394d..df7eb06 100644 --- a/mainWindow_new.py +++ b/mainWindow_new.py @@ -21,7 +21,7 @@ class Ui_MainWindow(object): icon2 = QtGui.QIcon() icon2.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) iconCopy = QtGui.QIcon() - iconCopy.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) + iconCopy.addPixmap(QtGui.QPixmap("copy.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) iconProcess = QtGui.QIcon() iconProcess.addPixmap(QtGui.QPixmap("process.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) MainWindow.setWindowIcon(icon) diff --git a/man_inv_test.xlsx b/man_inv_test.xlsx new file mode 100644 index 0000000..9871c60 Binary files /dev/null and b/man_inv_test.xlsx differ diff --git a/test_ach_0613.xlsx b/test_ach_0613.xlsx new file mode 100644 index 0000000..486d727 Binary files /dev/null and b/test_ach_0613.xlsx differ diff --git a/test_lb_0510.xlsx b/test_lb_0510.xlsx new file mode 100644 index 0000000..4064cc4 Binary files /dev/null and b/test_lb_0510.xlsx differ diff --git a/test_lb_0511.xlsx b/test_lb_0511.xlsx new file mode 100644 index 0000000..1d6dc06 Binary files /dev/null and b/test_lb_0511.xlsx differ diff --git a/test_lb_0613.xlsx b/test_lb_0613.xlsx new file mode 100644 index 0000000..0b7dc61 Binary files /dev/null and b/test_lb_0613.xlsx differ