Fixed issue with ACH (batch count error), added pbp invoice # regex, rewrote lockbox

4 years ago · d01fdde480
parent df96574a98
commit d01fdde480
12 changed files with 578 additions and 51 deletions
--- a/2022.05.04_MINV_C
+++ b/2022.05.04_MINV_C
@ -0,0 +1,112 @@
+
+
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         DAILY.MANUAL.INVOICE                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    CONTRACTS THAT WERE NOT INVOICED                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
+ PAGE 04-26-22  1
+
+                                                                        CHRG                               BUSINESS
+CONTRACT.NO........ UATB.OIC.DUE RENTAL DUE......... UATB.IDS.OIC.PAYME TYPEM..... OUTSTANDING BALANCE.... SEGMENT. BOOKING.DATE BRANCH
+ 
+100-2453558-003       05/09/2022 183.71                            0.00 MISC                        201.16  001.000   03/09/2018      9
+100-2453558-003       04/09/2022 183.71                          180.67 MISC                         20.49  001.000   03/09/2018      9
+100-2453558-003       03/09/2022 183.71                          183.71 MISC                         17.45  001.000   03/09/2018      9
+100-1665517-003       05/15/2022 412.97                            0.00 MISC                        438.78  001.000   10/31/2014      9
+100-4850431-001       05/10/2022 411.80                            0.00 MISC                        441.21  001.000   12/10/2018      3
+100-4462739-001       04/18/2022 157.08                            0.00 RENT                        157.08  001.000   06/18/2018      9
+100-4850431-001       04/10/2022 411.80                            0.00 MISC                        441.21  001.000   12/10/2018      3
+100-3500858-001       05/12/2022 262.37                            0.00 MISC                        279.42  001.000   04/13/2016      9
+100-3725849-003       05/13/2022 559.32                            0.00 MISC                        612.45  001.000   10/19/2017      9
+100-3500858-001       04/12/2022 262.37                            0.00 MISC                        279.42  001.000   04/13/2016      9
+047-2580598-001       04/05/2022 0.00                              0.00 MISC                         72.53  001.000   03/06/2014      9
+100-3725849-003       03/13/2022 559.32                          559.32 MISC                         53.13  001.000   10/19/2017      9
+100-4566489-001       04/14/2022 0.00                            354.04 MISC                          2.25  001.000   06/14/2018      9
+100-4566489-001       05/14/2022 0.00                            328.00 MISC                         28.29  001.000   06/14/2018      9
+100-5382471-001       04/15/2022 1,128.00                          0.00 MISC                      1,228.11  001.000   10/09/2019      9
+100-5382471-001       05/15/2022 1,128.00                          0.00 MISC                      1,228.11  001.000   10/09/2019      9
+100-9723689-001       04/20/2022 0.00                              0.00 RENT                        571.58  001.000   04/20/2022     10
+100-9723689-001       04/20/2022 0.00                              0.00 RENT                        571.58  001.000   04/20/2022     10
+100-7219911-001       04/20/2022 0.00                              0.00 RENT                        813.08  001.000   04/20/2022      9
+100-1354567-002       05/25/2022 170.00                            0.00 RENT                        170.00  001.000   05/31/2016      9
+100-6651721-001       07/14/2021 0.00                            761.00 MISC                         53.27  001.000   07/14/2021      9
+100-2081987-008       05/25/2022 407.15                            0.00 MISC                        439.73  001.000   10/02/2017      9
+100-2139037-002       05/20/2022 105.00                            0.00 MISC                        111.67  001.000   03/20/2017      9
+100-3725849-003       04/13/2022 559.32                          440.68 MISC                        171.77  001.000   10/19/2017      9
+100-3344078-002       05/01/2022 -28.53                           18.47 RENT                         63.00  001.000   10/01/2020      9
+100-2081987-008       03/25/2022 407.15                            0.00 MISC                        439.73  001.000   10/02/2017      9
+100-1354567-002       04/25/2022 170.00                            0.00 RENT                        170.00  001.000   05/31/2016      9
+100-2081987-008       04/25/2022 407.15                            0.00 MISC                        439.73  001.000   10/02/2017      9
+100-2081987-008       02/25/2022 407.15                          407.15 MISC                         32.58  001.000   10/02/2017      9
+100-3876959-007       04/21/2022 61.07                             0.00 RENT                         61.07  001.000   06/21/2018      9
+100-1637209-005       05/20/2022 2,023.20                          0.00 RENT                      2,023.20  001.000   04/25/2022      9
+100-7146771-001       04/20/2022 183.28                          167.00 RENT                         16.28  001.000   04/25/2022      3
+100-7146771-001       05/20/2022 183.28                            0.00 RENT                        183.28  001.000   04/25/2022      3
+100-7045691-001       05/20/2022 244.57                            0.00 RENT                        244.57  001.000   04/25/2022      3
+100-7059671-001       05/20/2022 60.00                             0.00 MISC                         64.20  001.000   04/25/2022      3
+100-7237601-001       04/20/2022 0.00                              0.00 RENT                     34,192.91  001.000   04/25/2022      3
+100-7242461-001       05/20/2022 57.00                             0.00 MISC                         60.99  001.000   04/25/2022      9
+100-7178461-001       05/20/2022 197.45                            0.00 MISC                        209.30  001.000   04/25/2022      3
+100-2611389-007       05/20/2022 171.76                            0.00 RENT                        171.76  001.000   04/25/2022      3
+100-7037791-001       05/01/2022 444.00                            0.00 MISC                        478.41  001.000   04/25/2022      9
+100-7203371-001       05/20/2022 1,566.40                          0.00 RENT                      1,566.40  001.000   04/25/2022      3
+100-6630017-005       05/01/2022 0.00                            178.55 MISC                          0.01  001.000   04/25/2022      3
+100-6738611-001       04/20/2022 0.00                              0.00 RENT                      4,545.94  001.000   04/25/2022      3
+100-6738611-001       04/25/2022 0.00                              0.00 RENT                      4,545.94  001.000   04/25/2022      3
+100-7052571-001       05/14/2022 255.87                            0.00 MISC                        278.90  001.000   04/25/2022      9
+100-1011756-004       05/20/2022 1,001.64                          0.00 MISC                      1,081.77  001.000   04/25/2022      9
+100-6849836-001       05/20/2022 1,077.47                          0.00 RENT                      1,077.47  001.000   04/25/2022      3
+100-3492758-003       05/15/2022 312.41                            0.00 RENT                        312.41  001.000   04/25/2022      9
+100-7156851-001       05/20/2022 150.00                            0.00 MISC                        159.00  001.000   04/25/2022      3
+100-7232561-001       05/20/2022 113.60                            0.00 MISC                        122.12  001.000   04/25/2022      9
+100-3876959-007       05/21/2022 61.07                             0.00 RENT                         61.07  001.000   06/21/2018      9
+100-5382931-003       05/20/2022 146.69                            0.00 RENT                        146.69  001.000   04/26/2022      3
+100-5722341-003       05/20/2022 170.00                            0.00 MISC                        181.90  001.000   04/26/2022      3
+100-7150721-001       04/20/2022 174.96                            0.00 RENT                        174.96  001.000   04/26/2022      3
+100-7150721-001       05/20/2022 174.96                            0.00 RENT                        174.96  001.000   04/26/2022      3
+100-7165521-001       05/20/2022 1,417.88                          0.00 RENT                      1,417.88  001.000   04/26/2022      3
+100-7227921-001       05/20/2022 64.00                             0.00 MISC                         69.28  001.000   04/26/2022      3
+100-4858739-002       05/15/2022 208.00                            0.00 MISC                        225.16  001.000   04/26/2022      3
+100-7100621-001       05/13/2022 880.10                            0.00 MISC                        954.90  001.000   04/26/2022      9
+100-9725556-001       04/25/2022 0.00                              0.00 RENT                        600.77  001.000   04/26/2022     10
+100-9725556-001       04/26/2022 0.00                              0.00 RENT                        600.77  001.000   04/26/2022     10
+100-7209051-001       05/20/2022 1,652.01                          0.00 RENT                      1,652.01  001.000   04/26/2022      3
+100-9660710-001       05/09/2022 174.75                            0.00 RENT                        174.75  001.000   06/09/2021     10
+100-5329301-002       04/20/2022 0.00                              0.00 RENT                        263.44  001.000   04/26/2022      3
+100-7087121-001       05/16/2022 3,294.46                      1,125.58 RENT                      3,294.46  001.000   02/16/2022     12
+100-6602681-003       04/25/2022 0.00                              0.00 RENT                        478.00  001.000   04/26/2022      3
+100-6602681-003       04/25/2022 0.00                              0.00 RENT                        478.00  001.000   04/26/2022      3
+100-6754131-001       05/20/2022 747.75                            0.00 RENT                        747.75  001.000   04/26/2022      3
+100-7214111-001       05/21/2022 542.97                            0.00 RENT                        542.97  001.000   04/26/2022      9
+101-6898811-001       04/20/2022 0.00                              0.00 RENT                     15,035.55  001.000   04/26/2022      3
+100-2406418-003       05/20/2022 200.00                            0.00 MISC                        219.00  001.000   04/26/2022      9
+100-6943901-002       05/16/2022 236.40                            0.00 MISC                        257.67  001.000   04/26/2022      9
+100-1623380-901       05/15/2022 1,769.11                          0.00 RENT                      1,769.11  001.000   04/26/2022     10
+100-7107941-001       05/20/2022 1,038.95                          0.00 RENT                      1,038.95  001.000   02/23/2022      3
+100-7031531-001       05/20/2022 120.00                            0.00 MISC                        130.92  001.000   04/26/2022      3
+100-3630389-005       05/20/2022 168.00                            0.00 MISC                        181.86  001.000   04/26/2022      3
+100-7174941-002       05/20/2022 1,667.38                          0.00 MISC                      1,804.93  001.000   04/26/2022      9
+100-5204521-002       05/25/2022 3,222.20                          0.00 RENT                      3,222.20  001.000   04/26/2022     12
+100-7241571-001       05/20/2022 55.00                             0.00 MISC                         59.54  001.000   04/26/2022      3
+100-7182731-001       04/20/2022 0.00                              0.00 RENT                      1,025.37  001.000   04/26/2022      3
+100-7182731-001       04/26/2022 0.00                              0.00 RENT                      1,025.37  001.000   04/26/2022      3
+100-9726258-001       04/25/2022 0.00                              0.00 RENT                        255.97  001.000   04/26/2022     10
+100-9726258-001       04/26/2022 0.00                              0.00 RENT                        255.97  001.000   04/26/2022     10
+100-7220301-001       04/20/2022 0.00                              0.00 RENT                      1,238.00  001.000   04/26/2022      3
+100-7151521-001       05/15/2022 94.00                             0.00 MISC                        102.46  001.000   04/26/2022      9
+100-7237751-001       05/25/2022 2.00                            101.65 MISC                          2.14  001.000   04/26/2022      9
+100-3876959-005       03/25/2022 0.00                              0.00 RENT                         60.74  001.000   09/27/2017      9
+100-3910629-001       03/25/2022 0.00                              0.00 RENT                        245.81  001.000   03/30/2017      9
+100-3876959-005       04/25/2022 0.00                              0.00 RENT                         60.74  001.000   09/27/2017      9
+100-3910629-001       04/25/2022 0.00                              0.00 RENT                        245.81  001.000   03/30/2017      9
+104-4687809-001       04/25/2022 -2,161.94                       140.00 MISC                          9.80  001.000   08/29/2018      9
+100-3964329-001       04/28/2022 318.13                            0.00 MISC                        340.40  001.000   03/31/2017      9
+100-3964329-001       03/28/2022 318.13                            0.00 MISC                        340.40  001.000   03/31/2017      9
+100-1670517-003       04/16/2022 0.00                              0.00 RENT                        165.00  001.000   09/16/2021      3
+100-4945021-001       05/15/2022 0.00                              0.00 RENT                      1,357.77  001.000   02/15/2019      3
+100-3694757-001       05/01/2022 298.00                            0.00 MISC                        324.45  001.000   09/02/2016      9
+100-3694757-001       04/01/2022 298.00                            0.00 MISC                        324.45  001.000   09/02/2016      9
+100-6651721-002       07/25/2021 0.00                            761.00 MISC                         53.27  001.000   07/27/2021      9
+100-6814061-001       04/06/2022 0.00                            169.00 RENT                         15.63  001.000   04/06/2022      3
+100-7170651-001       04/07/2022 0.00                             99.00 RENT                          9.50  001.000   04/07/2022      3
+100-2446458-002       04/06/2022 865.00                          859.26 MISC                         66.29  001.000   12/06/2016      9
+100-2446458-002       05/06/2022 865.00                            0.00 MISC                        925.55  001.000   12/06/2016      9
+102 records listed
--- a/ILExtract.py
+++ b/ILExtract.py
@ -138,7 +138,7 @@ def ach(report: str, save_name: str):
        "batch_num": [],
        "payment_date": [],
        "lessor": [],
-        "count": [],
+        #"count": [],
        "total": []
    }

@ -163,7 +163,8 @@ def ach(report: str, save_name: str):
            batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
            # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
            batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
-            batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
+            #print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
+            #batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
            # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
            # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
            [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
@ -397,54 +398,49 @@ def net_invest_trial_balance(report: str, save_name: str):
 def lockbox(report: str, save_name: str):
    lines = report.splitlines()
    extracted_data_dict = {
-        "CustomerName" : [],
-        "PaymentDate" : [],
-        "InvoiceNumber" : [],
-        "CheckNumber" : [],
-        "InvoicePayment" : [],
-        "ContractNumber" : [],
-        "ContractPayment" : [],
+        "SEQ" : [],
+        "PYMT DATE" : [],
+        "INV NUM" : [],
+        "CHECK NUMBER" : [],
+        "PAYMENT AMOUNT" : [],
+        "NOTE" : [],
+        "IL SEQ" : [],
+        "CONTRACT NUM" : [],
+        "IL PAYMENT AMOUNT" : [],
+        "CUST NAME" : [],
    }
-    # These are lists of the dictionary columns/keys and the data slots in which
-    # that data can be found in the report. this way we can iterate through them
-    # While extracting data
-    bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
-    infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
-
-    # Below are the Regular Exppressions used to find relvant data lines
-    full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
-    contract_only_line = "\s{90}\d.{7}1\d{2}-"
-    cust_name_line = "\s{98}.{28}\D*"
-    # The data extractor allows us to extract data from the report using slots
-    # Slots are ranges of character denote by the list feed into the creation function
-    data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
+    columns = list(extracted_data_dict.keys())
+    data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
    for line in enumerate(lines):
-        # We can skip empty lines
-        if len(line[1]) == 0: continue
-        # First we should check if there is a full line of data (defined by regex)
-        if re.search(full_line, line[1]):
-            # If this is true then we can iterate through the lists we created earlier and append the data to our dict
-            for k in range(0,len(bank_payment_records[0])):
-                extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
-            for k in range(0,len(infolease_payment_records[0])):
-                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
-        # Otherwise we should check if this is a line with only contract data
-        elif re.search(contract_only_line,line[1]):
-            # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
-            for k in range(0,len(bank_payment_records[0])):
-                extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
-            for k in range(0,len(infolease_payment_records[0])):
-                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
-        # If it doesn't hit either of these critera then continue since it's irelevant data
-        else: continue
-        i = 1 
-        # used to track how many lines below the current line we're looking for the customer name
-        # keep moving down a line and checking for a customer name
-        # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
-        while re.search(cust_name_line,lines[line[0]+i]) == None:
+        match = False
+        # Try to find the first SEQ # & a contract payment date e.i. ' 197     05/10/2022'
+        if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
+            match = True
+            # Add all of the data points except customer name
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
+        # Check to see if this line contains only an infolease payment
+        # Some times there are multiple infolease payments for a single bank record
+        elif re.search(contract_number_regex, line[1]) != None:
+            match = True
+            # If there is then we can add the same data as the previous complete line
+            [extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
+            # Then add the new data for the infolease contract
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
+        # If we had a match we need a customer name to associate with it
+        # Sometimes these can appear on the next page hense the while loop searching for a match
+        if match:
+            # We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
+            # And the next line is blank
+            if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
+                i = 0
+                # Look for a bunch of whitespace then some writing
+                while not re.match("\s{98}.{34}", lines[line[0]+i]):
                    i +=1
-        # Once it hits, add the name to the dict
-        extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
+                # Once we find it add the cust name to the dict (it's the only thing on the line)
+                extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
+            # if the condition above isnt met then the cust name is on the next line (even if that line is blank)
+            else:
+                extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
    dataframe = pd.DataFrame(extracted_data_dict)
    dataframe.to_excel(save_name, index=False)
    return dataframe
@ -496,7 +492,7 @@ def payment_transactions(report: str, save_name: str):
    for line in enumerate(lines):
        slot1 = data_extractor(1,line[1],False)
        if type(slot1) != str : continue
-        if re.search(contract_number_regex, slot1) != None:
+        if (re.search(contract_number_regex, slot1) or re.search("\d{3}\.\d{4}\.\d{4}", slot1))!= None:
            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)]
            tnum_match = re.search(transaction_num_regex, lines[line[0]+1])
            if tnum_match:
--- a/ManualInvoice.py
+++ b/ManualInvoice.py
@ -0,0 +1,142 @@
+import os
+import pandas as pd
+from datetime import datetime as dt, timedelta
+import re
+from pathlib import Path
+import time
+import numpy as np
+from pprint import pprint as prt
+
+
+def pfd(df: pd.DataFrame):
+    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
+        print(df)
+
+
+def create_line_divider(breakage_list: list):
+    """
+    This allows for the creation of a custom data extractor
+    Breakage list defines the split points that will be used for the line
+    Example
+    Given breakage_list [10, 20, 30]
+    using slot_num 0 in the resulting extract_line_slot will yield
+    characters 0 - 10 from the string. 
+    Slot 1 would give characters 10 - 20
+    """
+    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
+        """
+        Pulls data from a line/string using break points defined by the
+        parent function.
+        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
+        Will automatically convert numbers to floats
+        """
+        # We can't have a slot number higher than the number of slots
+        assert(slot_num < len(breakage_list)+1)
+        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
+        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
+        # In order to create a float we need to remove the , from the string
+        data = line_string[low_range:high_range].strip().replace(",", "")
+        try: data = float(data)
+        except: pass
+        if debug:
+            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
+        return data
+    return extract_line_slot
+
+
+def minv(report: str, save_name: str):
+    lines = report.splitlines()
+    data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
+    extracted_data_dict = {
+        "ContractNumber" : [],
+        "UTAB_OIC_DUE" : [],
+        "RentalDue" : [],
+        "UTAB_OIC_PYMT" : [],
+        "ChargeType" : [],
+        "OutstandBalance" : [],
+        "BizSegment" : [],
+        "BookingDate" : [],
+        "Branch" : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    for line in enumerate(lines):
+        if re.search(contract_number_regex, line[1]) != None:
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
+    #All the list lengths need to be the same so if anything was missed it will fail to build
+    dataframe = pd.DataFrame(extracted_data_dict)
+    # ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0)
+    # dt.today().strftime("%m/%m/%Y")
+    filtered = dataframe[
+        ((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\
+        ((dataframe["RentalDue"] == 0 )             & (dataframe["OutstandBalance"] > 100))]
+    filtered.to_excel(save_name, index=False)
+    return filtered
+
+current_output = [
+    '100-1011756-004',
+    '100-1354567-002',
+    '100-1637209-005',
+    '100-1665517-003',
+    '100-1670517-003',
+    '100-2081987-008',
+    '100-2139037-002',
+    '100-2446458-002',
+    '100-2453558-003',
+    '100-2611389-007',
+    '100-3492758-003',
+    '100-3500858-001',
+    '100-3694757-001',
+    '100-3725849-003',
+    '100-3876959-007',
+    '100-3910629-001',
+    '100-3964329-001',
+    '100-4462739-001',
+    '100-4850431-001',
+    '100-4945021-001',
+    '100-5382471-001',
+    '100-6738611-001',
+    '100-6849836-001',
+    '100-7037791-001',
+    '100-7045691-001',
+    '100-7052571-001',
+    '100-7059671-001',
+    '100-7087121-001',
+    '100-7107941-001',
+    '100-7146771-001',
+    '100-7156851-001',
+    '100-7178461-001',
+    '100-7203371-001',
+    '100-7219911-001',
+    '100-7232561-001',
+    '100-7237601-001',
+    '100-7242461-001',
+    '100-9660710-001',
+    '100-9723689-001',
+]
+
+contract_number_regex = "\d{3}-\d{7}-\d{3}"
+
+with open("2022.05.04_MINV_C", errors="replace") as ifile:
+    report = ifile.read()
+
+fin_df = minv(report, "man_inv_test.xlsx")
+pfd(fin_df)
+il_contracts = fin_df.ContractNumber.to_list()
+prt(il_contracts) 
+
+extra_contracts = []
+not_included = []
+for c in il_contracts:
+    if c not in current_output:
+        extra_contracts.append(c)
+for c in current_output:
+    if c not in il_contracts:
+        not_included.append(c)
+
+print("\nExtra Contracts:")
+prt(extra_contracts)
+print("Not Included Contracts:")
+prt(not_included)
+print(f"MATCHING CONTRACTS: {il_contracts == current_output}")
+print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}")
+print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}")
--- a/ach_fix.py
+++ b/ach_fix.py
@ -0,0 +1,110 @@
+import os
+import pandas as pd
+from datetime import datetime as dt, timedelta
+import sys, getopt
+import re
+from pathlib import Path
+import time
+import numpy as np
+
+
+contract_number_regex = "\d{3}-\d{7}-\d{3}"
+
+
+def create_line_divider(breakage_list: list):
+    """
+    This allows for the creation of a custom data extractor
+    Breakage list defines the split points that will be used for the line
+    Example
+    Given breakage_list [10, 20, 30]
+    using slot_num 0 in the resulting extract_line_slot will yield
+    characters 0 - 10 from the string. 
+    Slot 1 would give characters 10 - 20
+    """
+    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
+        """
+        Pulls data from a line/string using break points defined by the
+        parent function.
+        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
+        Will automatically convert numbers to floats
+        """
+        # We can't have a slot number higher than the number of slots
+        assert(slot_num < len(breakage_list)+1)
+        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
+        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
+        # In order to create a float we need to remove the , from the string
+        data = line_string[low_range:high_range].strip().replace(",", "")
+        try: data = float(data)
+        except: pass
+        if debug:
+            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
+        return data
+    return extract_line_slot
+
+def ach(report: str, save_name: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+    "ContractNumber" : [],
+    "CustomerName" : [],
+    "BankCode" : [],
+    "BankNumber": [],
+    "AccountNumber" : [],
+    "Payment" : [],
+    "Batch": [],
+    "Lessor": [],
+    "PaymentDate": [],
+    }
+    columns = list(extracted_data_dict.keys())
+    batches = {
+        "batch_num": [],
+        "payment_date": [],
+        "lessor": [],
+        #"count": [],
+        "total": []
+    }
+
+    data_extractor = create_line_divider([19,57,67,82,104])
+    bank_number_regex = "\d{9}"
+    batch_num_regex = "BATCH \d{4} TOTAL"
+    for line in enumerate(lines):
+        # Check for a contract number and a bank number in the line
+        if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
+            # Iterates through the columns list and adds the corresponding slot number to the dictonary for the column
+            # Here the order of the columns (keys in dictonary) matter since they need to be in the same order as 
+            # the slot numbers
+            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)]
+        # This searches for a statement that looks like a batch number
+        # This sums the contracts by thier lessor code. A feature requested by cash apps
+        if re.search(batch_num_regex, line[1]) != None:
+            # Batch number is always in characters 96 to 101
+            batches["batch_num"].append(line[1][96:101])
+            # Payment date will be 2 lines below that between charactes 114 and 125
+            batches["payment_date"].append(lines[line[0]+2][114:125])
+            # Lessor is just the first three number sof the contract number
+            batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
+            # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
+            batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
+            #print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
+            #batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
+            # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
+            # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
+            [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
+            [extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))]
+            [extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))]
+    # Now the dictioanry lists should all be equal lengths and we can create a dataframe
+    dataframe = pd.DataFrame(extracted_data_dict)
+    # We're creating two sheets: data & summary so we need to open and excel writer
+    # This also helps with a bug caused by larger dataframes
+    with pd.ExcelWriter(save_name) as writer:  
+        dataframe.to_excel(writer, index=False, sheet_name="data")
+        # The batches dictioanry is converted to a dataframe and added as it's own sheet
+        pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary")
+    return dataframe
+
+r1 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.05.27_ACH_C"
+r2 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.06.03_ACH_C"
+
+with open(r2, errors="replace") as ifile:
+    report = ifile.read()
+
+ach(report, "test_ach_0613.xlsx")
--- a/lbf.py
+++ b/lbf.py
@ -0,0 +1,168 @@
+import os
+import pandas as pd
+from datetime import datetime as dt, timedelta
+import sys, getopt
+import re
+from pathlib import Path
+import time
+import numpy as np
+from pprint import pprint as prt
+
+
+contract_number_regex = "\d{3}-\d{7}-\d{3}"
+
+def dict_lens(dictionary):
+    columns = list(dictionary.keys())
+    for c in columns:
+        print(f"{c} : {len(dictionary[c])}")
+
+
+
+def create_line_divider(breakage_list: list):
+    """
+    This allows for the creation of a custom data extractor
+    Breakage list defines the split points that will be used for the line
+    Example
+    Given breakage_list [10, 20, 30]
+    using slot_num 0 in the resulting extract_line_slot will yield
+    characters 0 - 10 from the string. 
+    Slot 1 would give characters 10 - 20
+    """
+    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
+        """
+        Pulls data from a line/string using break points defined by the
+        parent function.
+        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
+        Will automatically convert numbers to floats
+        """
+        # We can't have a slot number higher than the number of slots
+        assert(slot_num < len(breakage_list)+1)
+        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
+        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
+        # In order to create a float we need to remove the , from the string
+        data = line_string[low_range:high_range].strip().replace(",", "")
+        try: data = float(data)
+        except: pass
+        if debug:
+            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
+        return data
+    return extract_line_slot
+
+def lockbox(report: str, save_name: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+        "CustomerName" : [],
+        "PaymentDate" : [],
+        "InvoiceNumber" : [],
+        "CheckNumber" : [],
+        "InvoicePayment" : [],
+        "ContractNumber" : [],
+        "ContractPayment" : [],
+    }
+    # These are lists of the dictionary columns/keys and the data slots in which
+    # that data can be found in the report. this way we can iterate through them
+    # While extracting data
+    bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
+    infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
+
+    # Below are the Regular Exppressions used to find relvant data lines
+    full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
+    contract_only_line = "\s{90}\d.{7}1\d{2}-"
+    cust_name_line = "\s{98}.{28}\D*"
+    # The data extractor allows us to extract data from the report using slots
+    # Slots are ranges of character denote by the list feed into the creation function
+    data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
+    for line in enumerate(lines):
+        # We can skip empty lines
+        if len(line[1]) == 0: continue
+        # First we should check if there is a full line of data (defined by regex)
+        if re.search(full_line, line[1]):
+            # If this is true then we can iterate through the lists we created earlier and append the data to our dict
+            for k in range(0,len(bank_payment_records[0])):
+                extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
+            for k in range(0,len(infolease_payment_records[0])):
+                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
+        # Otherwise we should check if this is a line with only contract data
+        elif re.search(contract_only_line,line[1]):
+            # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
+            for k in range(0,len(bank_payment_records[0])):
+                extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
+            for k in range(0,len(infolease_payment_records[0])):
+                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
+        # If it doesn't hit either of these critera then continue since it's irelevant data
+        else: continue
+        i = 1 
+        # used to track how many lines below the current line we're looking for the customer name
+        # keep moving down a line and checking for a customer name
+        # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
+        while re.search(cust_name_line,lines[line[0]+i]) == None:
+            i += 1
+        # Once it hits, add the name to the dict
+        extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
+    dataframe = pd.DataFrame(extracted_data_dict)
+    dataframe.to_excel(save_name, index=False)
+    return dataframe
+
+
+def lb2(report:str, save_name:str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+        "SEQ" : [],
+        "PYMT DATE" : [],
+        "INV NUM" : [],
+        "CHECK NUMBER" : [],
+        "PAYMENT AMOUNT" : [],
+        "NOTE" : [],
+        "IL SEQ" : [],
+        "CONTRACT NUM" : [],
+        "IL PAYMENT AMOUNT" : [],
+        "CUST NAME" : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
+    for line in enumerate(lines):
+        match = False
+        # Try to find the first SEQ # & a contract payment date e.i. ' 197     05/10/2022'
+        if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
+            match = True
+            # Add all of the data points except customer name
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
+        # Check to see if this line contains only an infolease payment
+        # Some times there are multiple infolease payments for a single bank record
+        elif re.search(contract_number_regex, line[1]) != None:
+            match = True
+            # If there is then we can add the same data as the previous complete line
+            [extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
+            # Then add the new data for the infolease contract
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
+        # If we had a match we need a customer name to associate with it
+        # Sometimes these can appear on the next page hense the while loop searching for a match
+        if match:
+            # We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
+            # And the next line is blank
+            if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
+                i = 0
+                # Look for a bunch of whitespace then some writing
+                while not re.match("\s{98}.{34}", lines[line[0]+i]):
+                    i +=1
+                # Once we find it add the cust name to the dict (it's the only thing on the line)
+                extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
+            # if the condition above isnt met then the cust name is on the next line (even if that line is blank)
+            else:
+                extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
+    dataframe = pd.DataFrame(extracted_data_dict)
+    dataframe.to_excel(save_name, index=False)
+    return dataframe
+
+
+r1 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.10_LOCKBOX_094_C"
+r2 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.11_LOCKBOX_094_C"
+
+with open(r1, errors="replace") as ifile:
+    report = ifile.read()
+
+lb2(report, "test_lb_0510.xlsx")
+
+with open(r2, errors="replace") as ifile:
+    report = ifile.read()
+lb2(report, "test_lb_0511.xlsx")
--- a/main.py
+++ b/main.py
@ -23,7 +23,6 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
        self.processReportButton.clicked.connect(self.process_selection)
        self.openReportButton.clicked.connect(self.to_clipboard)

-
    def getfile(self):
        inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file') 
        self.inputFileLE.setText(inFile[0])
--- a/mainWindow_new.py
+++ b/mainWindow_new.py
@ -21,7 +21,7 @@ class Ui_MainWindow(object):
        icon2 = QtGui.QIcon()
        icon2.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
        iconCopy = QtGui.QIcon()
-        iconCopy.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
+        iconCopy.addPixmap(QtGui.QPixmap("copy.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
        iconProcess = QtGui.QIcon()
        iconProcess.addPixmap(QtGui.QPixmap("process.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
        MainWindow.setWindowIcon(icon)
--- a/man_inv_test.xlsx
+++ b/man_inv_test.xlsx
--- a/test_ach_0613.xlsx
+++ b/test_ach_0613.xlsx
--- a/test_lb_0510.xlsx
+++ b/test_lb_0510.xlsx
--- a/test_lb_0511.xlsx
+++ b/test_lb_0511.xlsx
--- a/test_lb_0613.xlsx
+++ b/test_lb_0613.xlsx