Fixed issue with ACH (batch count error), added pbp invoice # regex, rewrote lockbox

4 years ago · d01fdde480
parent df96574a98
commit d01fdde480
12 changed files with 578 additions and 51 deletions
--- a/2022.05.04_MINV_C
+++ b/2022.05.04_MINV_C
@ -0,0 +1,112 @@
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         DAILY.MANUAL.INVOICE                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    CONTRACTS THAT WERE NOT INVOICED                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
 PAGE 04-26-22  1
                                                                        CHRG                               BUSINESS
 CONTRACT.NO........ UATB.OIC.DUE RENTAL DUE......... UATB.IDS.OIC.PAYME TYPEM..... OUTSTANDING BALANCE.... SEGMENT. BOOKING.DATE BRANCH
 100-2453558-003       05/09/2022 183.71                            0.00 MISC                        201.16  001.000   03/09/2018      9
 100-2453558-003       04/09/2022 183.71                          180.67 MISC                         20.49  001.000   03/09/2018      9
 100-2453558-003       03/09/2022 183.71                          183.71 MISC                         17.45  001.000   03/09/2018      9
 100-1665517-003       05/15/2022 412.97                            0.00 MISC                        438.78  001.000   10/31/2014      9
 100-4850431-001       05/10/2022 411.80                            0.00 MISC                        441.21  001.000   12/10/2018      3
 100-4462739-001       04/18/2022 157.08                            0.00 RENT                        157.08  001.000   06/18/2018      9
 100-4850431-001       04/10/2022 411.80                            0.00 MISC                        441.21  001.000   12/10/2018      3
 100-3500858-001       05/12/2022 262.37                            0.00 MISC                        279.42  001.000   04/13/2016      9
 100-3725849-003       05/13/2022 559.32                            0.00 MISC                        612.45  001.000   10/19/2017      9
 100-3500858-001       04/12/2022 262.37                            0.00 MISC                        279.42  001.000   04/13/2016      9
 047-2580598-001       04/05/2022 0.00                              0.00 MISC                         72.53  001.000   03/06/2014      9
 100-3725849-003       03/13/2022 559.32                          559.32 MISC                         53.13  001.000   10/19/2017      9
 100-4566489-001       04/14/2022 0.00                            354.04 MISC                          2.25  001.000   06/14/2018      9
 100-4566489-001       05/14/2022 0.00                            328.00 MISC                         28.29  001.000   06/14/2018      9
 100-5382471-001       04/15/2022 1,128.00                          0.00 MISC                      1,228.11  001.000   10/09/2019      9
 100-5382471-001       05/15/2022 1,128.00                          0.00 MISC                      1,228.11  001.000   10/09/2019      9
 100-9723689-001       04/20/2022 0.00                              0.00 RENT                        571.58  001.000   04/20/2022     10
 100-9723689-001       04/20/2022 0.00                              0.00 RENT                        571.58  001.000   04/20/2022     10
 100-7219911-001       04/20/2022 0.00                              0.00 RENT                        813.08  001.000   04/20/2022      9
 100-1354567-002       05/25/2022 170.00                            0.00 RENT                        170.00  001.000   05/31/2016      9
 100-6651721-001       07/14/2021 0.00                            761.00 MISC                         53.27  001.000   07/14/2021      9
 100-2081987-008       05/25/2022 407.15                            0.00 MISC                        439.73  001.000   10/02/2017      9
 100-2139037-002       05/20/2022 105.00                            0.00 MISC                        111.67  001.000   03/20/2017      9
 100-3725849-003       04/13/2022 559.32                          440.68 MISC                        171.77  001.000   10/19/2017      9
 100-3344078-002       05/01/2022 -28.53                           18.47 RENT                         63.00  001.000   10/01/2020      9
 100-2081987-008       03/25/2022 407.15                            0.00 MISC                        439.73  001.000   10/02/2017      9
 100-1354567-002       04/25/2022 170.00                            0.00 RENT                        170.00  001.000   05/31/2016      9
 100-2081987-008       04/25/2022 407.15                            0.00 MISC                        439.73  001.000   10/02/2017      9
 100-2081987-008       02/25/2022 407.15                          407.15 MISC                         32.58  001.000   10/02/2017      9
 100-3876959-007       04/21/2022 61.07                             0.00 RENT                         61.07  001.000   06/21/2018      9
 100-1637209-005       05/20/2022 2,023.20                          0.00 RENT                      2,023.20  001.000   04/25/2022      9
 100-7146771-001       04/20/2022 183.28                          167.00 RENT                         16.28  001.000   04/25/2022      3
 100-7146771-001       05/20/2022 183.28                            0.00 RENT                        183.28  001.000   04/25/2022      3
 100-7045691-001       05/20/2022 244.57                            0.00 RENT                        244.57  001.000   04/25/2022      3
 100-7059671-001       05/20/2022 60.00                             0.00 MISC                         64.20  001.000   04/25/2022      3
 100-7237601-001       04/20/2022 0.00                              0.00 RENT                     34,192.91  001.000   04/25/2022      3
 100-7242461-001       05/20/2022 57.00                             0.00 MISC                         60.99  001.000   04/25/2022      9
 100-7178461-001       05/20/2022 197.45                            0.00 MISC                        209.30  001.000   04/25/2022      3
 100-2611389-007       05/20/2022 171.76                            0.00 RENT                        171.76  001.000   04/25/2022      3
 100-7037791-001       05/01/2022 444.00                            0.00 MISC                        478.41  001.000   04/25/2022      9
 100-7203371-001       05/20/2022 1,566.40                          0.00 RENT                      1,566.40  001.000   04/25/2022      3
 100-6630017-005       05/01/2022 0.00                            178.55 MISC                          0.01  001.000   04/25/2022      3
 100-6738611-001       04/20/2022 0.00                              0.00 RENT                      4,545.94  001.000   04/25/2022      3
 100-6738611-001       04/25/2022 0.00                              0.00 RENT                      4,545.94  001.000   04/25/2022      3
 100-7052571-001       05/14/2022 255.87                            0.00 MISC                        278.90  001.000   04/25/2022      9
 100-1011756-004       05/20/2022 1,001.64                          0.00 MISC                      1,081.77  001.000   04/25/2022      9
 100-6849836-001       05/20/2022 1,077.47                          0.00 RENT                      1,077.47  001.000   04/25/2022      3
 100-3492758-003       05/15/2022 312.41                            0.00 RENT                        312.41  001.000   04/25/2022      9
 100-7156851-001       05/20/2022 150.00                            0.00 MISC                        159.00  001.000   04/25/2022      3
 100-7232561-001       05/20/2022 113.60                            0.00 MISC                        122.12  001.000   04/25/2022      9
 100-3876959-007       05/21/2022 61.07                             0.00 RENT                         61.07  001.000   06/21/2018      9
 100-5382931-003       05/20/2022 146.69                            0.00 RENT                        146.69  001.000   04/26/2022      3
 100-5722341-003       05/20/2022 170.00                            0.00 MISC                        181.90  001.000   04/26/2022      3
 100-7150721-001       04/20/2022 174.96                            0.00 RENT                        174.96  001.000   04/26/2022      3
 100-7150721-001       05/20/2022 174.96                            0.00 RENT                        174.96  001.000   04/26/2022      3
 100-7165521-001       05/20/2022 1,417.88                          0.00 RENT                      1,417.88  001.000   04/26/2022      3
 100-7227921-001       05/20/2022 64.00                             0.00 MISC                         69.28  001.000   04/26/2022      3
 100-4858739-002       05/15/2022 208.00                            0.00 MISC                        225.16  001.000   04/26/2022      3
 100-7100621-001       05/13/2022 880.10                            0.00 MISC                        954.90  001.000   04/26/2022      9
 100-9725556-001       04/25/2022 0.00                              0.00 RENT                        600.77  001.000   04/26/2022     10
 100-9725556-001       04/26/2022 0.00                              0.00 RENT                        600.77  001.000   04/26/2022     10
 100-7209051-001       05/20/2022 1,652.01                          0.00 RENT                      1,652.01  001.000   04/26/2022      3
 100-9660710-001       05/09/2022 174.75                            0.00 RENT                        174.75  001.000   06/09/2021     10
 100-5329301-002       04/20/2022 0.00                              0.00 RENT                        263.44  001.000   04/26/2022      3
 100-7087121-001       05/16/2022 3,294.46                      1,125.58 RENT                      3,294.46  001.000   02/16/2022     12
 100-6602681-003       04/25/2022 0.00                              0.00 RENT                        478.00  001.000   04/26/2022      3
 100-6602681-003       04/25/2022 0.00                              0.00 RENT                        478.00  001.000   04/26/2022      3
 100-6754131-001       05/20/2022 747.75                            0.00 RENT                        747.75  001.000   04/26/2022      3
 100-7214111-001       05/21/2022 542.97                            0.00 RENT                        542.97  001.000   04/26/2022      9
 101-6898811-001       04/20/2022 0.00                              0.00 RENT                     15,035.55  001.000   04/26/2022      3
 100-2406418-003       05/20/2022 200.00                            0.00 MISC                        219.00  001.000   04/26/2022      9
 100-6943901-002       05/16/2022 236.40                            0.00 MISC                        257.67  001.000   04/26/2022      9
 100-1623380-901       05/15/2022 1,769.11                          0.00 RENT                      1,769.11  001.000   04/26/2022     10
 100-7107941-001       05/20/2022 1,038.95                          0.00 RENT                      1,038.95  001.000   02/23/2022      3
 100-7031531-001       05/20/2022 120.00                            0.00 MISC                        130.92  001.000   04/26/2022      3
 100-3630389-005       05/20/2022 168.00                            0.00 MISC                        181.86  001.000   04/26/2022      3
 100-7174941-002       05/20/2022 1,667.38                          0.00 MISC                      1,804.93  001.000   04/26/2022      9
 100-5204521-002       05/25/2022 3,222.20                          0.00 RENT                      3,222.20  001.000   04/26/2022     12
 100-7241571-001       05/20/2022 55.00                             0.00 MISC                         59.54  001.000   04/26/2022      3
 100-7182731-001       04/20/2022 0.00                              0.00 RENT                      1,025.37  001.000   04/26/2022      3
 100-7182731-001       04/26/2022 0.00                              0.00 RENT                      1,025.37  001.000   04/26/2022      3
 100-9726258-001       04/25/2022 0.00                              0.00 RENT                        255.97  001.000   04/26/2022     10
 100-9726258-001       04/26/2022 0.00                              0.00 RENT                        255.97  001.000   04/26/2022     10
 100-7220301-001       04/20/2022 0.00                              0.00 RENT                      1,238.00  001.000   04/26/2022      3
 100-7151521-001       05/15/2022 94.00                             0.00 MISC                        102.46  001.000   04/26/2022      9
 100-7237751-001       05/25/2022 2.00                            101.65 MISC                          2.14  001.000   04/26/2022      9
 100-3876959-005       03/25/2022 0.00                              0.00 RENT                         60.74  001.000   09/27/2017      9
 100-3910629-001       03/25/2022 0.00                              0.00 RENT                        245.81  001.000   03/30/2017      9
 100-3876959-005       04/25/2022 0.00                              0.00 RENT                         60.74  001.000   09/27/2017      9
 100-3910629-001       04/25/2022 0.00                              0.00 RENT                        245.81  001.000   03/30/2017      9
 104-4687809-001       04/25/2022 -2,161.94                       140.00 MISC                          9.80  001.000   08/29/2018      9
 100-3964329-001       04/28/2022 318.13                            0.00 MISC                        340.40  001.000   03/31/2017      9
 100-3964329-001       03/28/2022 318.13                            0.00 MISC                        340.40  001.000   03/31/2017      9
 100-1670517-003       04/16/2022 0.00                              0.00 RENT                        165.00  001.000   09/16/2021      3
 100-4945021-001       05/15/2022 0.00                              0.00 RENT                      1,357.77  001.000   02/15/2019      3
 100-3694757-001       05/01/2022 298.00                            0.00 MISC                        324.45  001.000   09/02/2016      9
 100-3694757-001       04/01/2022 298.00                            0.00 MISC                        324.45  001.000   09/02/2016      9
 100-6651721-002       07/25/2021 0.00                            761.00 MISC                         53.27  001.000   07/27/2021      9
 100-6814061-001       04/06/2022 0.00                            169.00 RENT                         15.63  001.000   04/06/2022      3
 100-7170651-001       04/07/2022 0.00                             99.00 RENT                          9.50  001.000   04/07/2022      3
 100-2446458-002       04/06/2022 865.00                          859.26 MISC                         66.29  001.000   12/06/2016      9
 100-2446458-002       05/06/2022 865.00                            0.00 MISC                        925.55  001.000   12/06/2016      9
 102 records listed
--- a/ILExtract.py
+++ b/ILExtract.py
@ -138,7 +138,7 @@ def ach(report: str, save_name: str):
        "batch_num": [],
        "payment_date": [],
        "lessor": [],
-        "count": [],
+        #"count": [],
        "total": []
    }
@ -163,7 +163,8 @@ def ach(report: str, save_name: str):
            batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
            # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
            batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
-            batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
+            #print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
            #batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
            # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
            # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
            [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
@ -397,54 +398,49 @@ def net_invest_trial_balance(report: str, save_name: str):
 def lockbox(report: str, save_name: str):
    lines = report.splitlines()
    extracted_data_dict = {
-        "CustomerName" : [],
+        "SEQ" : [],
-        "PaymentDate" : [],
+        "PYMT DATE" : [],
-        "InvoiceNumber" : [],
+        "INV NUM" : [],
-        "CheckNumber" : [],
+        "CHECK NUMBER" : [],
-        "InvoicePayment" : [],
+        "PAYMENT AMOUNT" : [],
-        "ContractNumber" : [],
+        "NOTE" : [],
-        "ContractPayment" : [],
+        "IL SEQ" : [],
        "CONTRACT NUM" : [],
        "IL PAYMENT AMOUNT" : [],
        "CUST NAME" : [],
    }
-    # These are lists of the dictionary columns/keys and the data slots in which
+    columns = list(extracted_data_dict.keys())
-    # that data can be found in the report. this way we can iterate through them
+    data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
    # While extracting data
    bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
    infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
    # Below are the Regular Exppressions used to find relvant data lines
    full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
    contract_only_line = "\s{90}\d.{7}1\d{2}-"
    cust_name_line = "\s{98}.{28}\D*"
    # The data extractor allows us to extract data from the report using slots
    # Slots are ranges of character denote by the list feed into the creation function
    data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
    for line in enumerate(lines):
-        # We can skip empty lines
+        match = False
-        if len(line[1]) == 0: continue
+        # Try to find the first SEQ # & a contract payment date e.i. ' 197     05/10/2022'
-        # First we should check if there is a full line of data (defined by regex)
+        if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
-        if re.search(full_line, line[1]):
+            match = True
-            # If this is true then we can iterate through the lists we created earlier and append the data to our dict
+            # Add all of the data points except customer name
-            for k in range(0,len(bank_payment_records[0])):
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
-                extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
+        # Check to see if this line contains only an infolease payment
-            for k in range(0,len(infolease_payment_records[0])):
+        # Some times there are multiple infolease payments for a single bank record
-                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
+        elif re.search(contract_number_regex, line[1]) != None:
-        # Otherwise we should check if this is a line with only contract data
+            match = True
-        elif re.search(contract_only_line,line[1]):
+            # If there is then we can add the same data as the previous complete line
-            # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
+            [extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
-            for k in range(0,len(bank_payment_records[0])):
+            # Then add the new data for the infolease contract
-                extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
-            for k in range(0,len(infolease_payment_records[0])):
+        # If we had a match we need a customer name to associate with it
-                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
+        # Sometimes these can appear on the next page hense the while loop searching for a match
-        # If it doesn't hit either of these critera then continue since it's irelevant data
+        if match:
-        else: continue
+            # We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
-        i = 1 
+            # And the next line is blank
-        # used to track how many lines below the current line we're looking for the customer name
+            if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
-        # keep moving down a line and checking for a customer name
+                i = 0
-        # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
+                # Look for a bunch of whitespace then some writing
-        while re.search(cust_name_line,lines[line[0]+i]) == None:
+                while not re.match("\s{98}.{34}", lines[line[0]+i]):
-            i += 1
+                    i +=1
-        # Once it hits, add the name to the dict
+                # Once we find it add the cust name to the dict (it's the only thing on the line)
-        extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
+                extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
            # if the condition above isnt met then the cust name is on the next line (even if that line is blank)
            else:
                extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
    dataframe = pd.DataFrame(extracted_data_dict)
    dataframe.to_excel(save_name, index=False)
    return dataframe
@ -496,7 +492,7 @@ def payment_transactions(report: str, save_name: str):
    for line in enumerate(lines):
        slot1 = data_extractor(1,line[1],False)
        if type(slot1) != str : continue
-        if re.search(contract_number_regex, slot1) != None:
+        if (re.search(contract_number_regex, slot1) or re.search("\d{3}\.\d{4}\.\d{4}", slot1))!= None:
            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)]
            tnum_match = re.search(transaction_num_regex, lines[line[0]+1])
            if tnum_match:
--- a/ManualInvoice.py
+++ b/ManualInvoice.py
@ -0,0 +1,142 @@
 import os
 import pandas as pd
 from datetime import datetime as dt, timedelta
 import re
 from pathlib import Path
 import time
 import numpy as np
 from pprint import pprint as prt
 def pfd(df: pd.DataFrame):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        print(df)
 def create_line_divider(breakage_list: list):
    """
    This allows for the creation of a custom data extractor
    Breakage list defines the split points that will be used for the line
    Example
    Given breakage_list [10, 20, 30]
    using slot_num 0 in the resulting extract_line_slot will yield
    characters 0 - 10 from the string. 
    Slot 1 would give characters 10 - 20
    """
    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
        """
        Pulls data from a line/string using break points defined by the
        parent function.
        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
        Will automatically convert numbers to floats
        """
        # We can't have a slot number higher than the number of slots
        assert(slot_num < len(breakage_list)+1)
        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
        # In order to create a float we need to remove the , from the string
        data = line_string[low_range:high_range].strip().replace(",", "")
        try: data = float(data)
        except: pass
        if debug:
            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
        return data
    return extract_line_slot
 def minv(report: str, save_name: str):
    lines = report.splitlines()
    data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
    extracted_data_dict = {
        "ContractNumber" : [],
        "UTAB_OIC_DUE" : [],
        "RentalDue" : [],
        "UTAB_OIC_PYMT" : [],
        "ChargeType" : [],
        "OutstandBalance" : [],
        "BizSegment" : [],
        "BookingDate" : [],
        "Branch" : [],
    }
    columns = list(extracted_data_dict.keys())
    for line in enumerate(lines):
        if re.search(contract_number_regex, line[1]) != None:
            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
    #All the list lengths need to be the same so if anything was missed it will fail to build
    dataframe = pd.DataFrame(extracted_data_dict)
    # ( bookdate != today & rent = 0 ) OR (outstanding > 100 & rent = 0)
    # dt.today().strftime("%m/%m/%Y")
    filtered = dataframe[
        ((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0)) |\
        ((dataframe["RentalDue"] == 0 )             & (dataframe["OutstandBalance"] > 100))]
    filtered.to_excel(save_name, index=False)
    return filtered
 current_output = [
    '100-1011756-004',
    '100-1354567-002',
    '100-1637209-005',
    '100-1665517-003',
    '100-1670517-003',
    '100-2081987-008',
    '100-2139037-002',
    '100-2446458-002',
    '100-2453558-003',
    '100-2611389-007',
    '100-3492758-003',
    '100-3500858-001',
    '100-3694757-001',
    '100-3725849-003',
    '100-3876959-007',
    '100-3910629-001',
    '100-3964329-001',
    '100-4462739-001',
    '100-4850431-001',
    '100-4945021-001',
    '100-5382471-001',
    '100-6738611-001',
    '100-6849836-001',
    '100-7037791-001',
    '100-7045691-001',
    '100-7052571-001',
    '100-7059671-001',
    '100-7087121-001',
    '100-7107941-001',
    '100-7146771-001',
    '100-7156851-001',
    '100-7178461-001',
    '100-7203371-001',
    '100-7219911-001',
    '100-7232561-001',
    '100-7237601-001',
    '100-7242461-001',
    '100-9660710-001',
    '100-9723689-001',
 ]
 contract_number_regex = "\d{3}-\d{7}-\d{3}"
 with open("2022.05.04_MINV_C", errors="replace") as ifile:
    report = ifile.read()
 fin_df = minv(report, "man_inv_test.xlsx")
 pfd(fin_df)
 il_contracts = fin_df.ContractNumber.to_list()
 prt(il_contracts) 
 extra_contracts = []
 not_included = []
 for c in il_contracts:
    if c not in current_output:
        extra_contracts.append(c)
 for c in current_output:
    if c not in il_contracts:
        not_included.append(c)
 print("\nExtra Contracts:")
 prt(extra_contracts)
 print("Not Included Contracts:")
 prt(not_included)
 print(f"MATCHING CONTRACTS: {il_contracts == current_output}")
 print(f"Current # contract {len(current_output)} | ILE Processed Contracts: {len(il_contracts)}")
 print(f"# Extra contracts included: {len(extra_contracts)} | # Contracts not included: {len(not_included)}")
--- a/ach_fix.py
+++ b/ach_fix.py
@ -0,0 +1,110 @@
 import os
 import pandas as pd
 from datetime import datetime as dt, timedelta
 import sys, getopt
 import re
 from pathlib import Path
 import time
 import numpy as np
 contract_number_regex = "\d{3}-\d{7}-\d{3}"
 def create_line_divider(breakage_list: list):
    """
    This allows for the creation of a custom data extractor
    Breakage list defines the split points that will be used for the line
    Example
    Given breakage_list [10, 20, 30]
    using slot_num 0 in the resulting extract_line_slot will yield
    characters 0 - 10 from the string. 
    Slot 1 would give characters 10 - 20
    """
    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
        """
        Pulls data from a line/string using break points defined by the
        parent function.
        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
        Will automatically convert numbers to floats
        """
        # We can't have a slot number higher than the number of slots
        assert(slot_num < len(breakage_list)+1)
        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
        # In order to create a float we need to remove the , from the string
        data = line_string[low_range:high_range].strip().replace(",", "")
        try: data = float(data)
        except: pass
        if debug:
            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
        return data
    return extract_line_slot
 def ach(report: str, save_name: str):
    lines = report.splitlines()
    extracted_data_dict = {
    "ContractNumber" : [],
    "CustomerName" : [],
    "BankCode" : [],
    "BankNumber": [],
    "AccountNumber" : [],
    "Payment" : [],
    "Batch": [],
    "Lessor": [],
    "PaymentDate": [],
    }
    columns = list(extracted_data_dict.keys())
    batches = {
        "batch_num": [],
        "payment_date": [],
        "lessor": [],
        #"count": [],
        "total": []
    }
    data_extractor = create_line_divider([19,57,67,82,104])
    bank_number_regex = "\d{9}"
    batch_num_regex = "BATCH \d{4} TOTAL"
    for line in enumerate(lines):
        # Check for a contract number and a bank number in the line
        if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
            # Iterates through the columns list and adds the corresponding slot number to the dictonary for the column
            # Here the order of the columns (keys in dictonary) matter since they need to be in the same order as 
            # the slot numbers
            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)]
        # This searches for a statement that looks like a batch number
        # This sums the contracts by thier lessor code. A feature requested by cash apps
        if re.search(batch_num_regex, line[1]) != None:
            # Batch number is always in characters 96 to 101
            batches["batch_num"].append(line[1][96:101])
            # Payment date will be 2 lines below that between charactes 114 and 125
            batches["payment_date"].append(lines[line[0]+2][114:125])
            # Lessor is just the first three number sof the contract number
            batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
            # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
            batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
            #print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
            #batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
            # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
            # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
            [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
            [extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))]
            [extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))]
    # Now the dictioanry lists should all be equal lengths and we can create a dataframe
    dataframe = pd.DataFrame(extracted_data_dict)
    # We're creating two sheets: data & summary so we need to open and excel writer
    # This also helps with a bug caused by larger dataframes
    with pd.ExcelWriter(save_name) as writer:  
        dataframe.to_excel(writer, index=False, sheet_name="data")
        # The batches dictioanry is converted to a dataframe and added as it's own sheet
        pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary")
    return dataframe
 r1 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.05.27_ACH_C"
 r2 = "/config/workspace/LEAF/IL Extract SRC/ach_errors/2022.06.03_ACH_C"
 with open(r2, errors="replace") as ifile:
    report = ifile.read()
 ach(report, "test_ach_0613.xlsx")
--- a/lbf.py
+++ b/lbf.py
@ -0,0 +1,168 @@
 import os
 import pandas as pd
 from datetime import datetime as dt, timedelta
 import sys, getopt
 import re
 from pathlib import Path
 import time
 import numpy as np
 from pprint import pprint as prt
 contract_number_regex = "\d{3}-\d{7}-\d{3}"
 def dict_lens(dictionary):
    columns = list(dictionary.keys())
    for c in columns:
        print(f"{c} : {len(dictionary[c])}")
 def create_line_divider(breakage_list: list):
    """
    This allows for the creation of a custom data extractor
    Breakage list defines the split points that will be used for the line
    Example
    Given breakage_list [10, 20, 30]
    using slot_num 0 in the resulting extract_line_slot will yield
    characters 0 - 10 from the string. 
    Slot 1 would give characters 10 - 20
    """
    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
        """
        Pulls data from a line/string using break points defined by the
        parent function.
        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
        Will automatically convert numbers to floats
        """
        # We can't have a slot number higher than the number of slots
        assert(slot_num < len(breakage_list)+1)
        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
        # In order to create a float we need to remove the , from the string
        data = line_string[low_range:high_range].strip().replace(",", "")
        try: data = float(data)
        except: pass
        if debug:
            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
        return data
    return extract_line_slot
 def lockbox(report: str, save_name: str):
    lines = report.splitlines()
    extracted_data_dict = {
        "CustomerName" : [],
        "PaymentDate" : [],
        "InvoiceNumber" : [],
        "CheckNumber" : [],
        "InvoicePayment" : [],
        "ContractNumber" : [],
        "ContractPayment" : [],
    }
    # These are lists of the dictionary columns/keys and the data slots in which
    # that data can be found in the report. this way we can iterate through them
    # While extracting data
    bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
    infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
    # Below are the Regular Exppressions used to find relvant data lines
    full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
    contract_only_line = "\s{90}\d.{7}1\d{2}-"
    cust_name_line = "\s{98}.{28}\D*"
    # The data extractor allows us to extract data from the report using slots
    # Slots are ranges of character denote by the list feed into the creation function
    data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
    for line in enumerate(lines):
        # We can skip empty lines
        if len(line[1]) == 0: continue
        # First we should check if there is a full line of data (defined by regex)
        if re.search(full_line, line[1]):
            # If this is true then we can iterate through the lists we created earlier and append the data to our dict
            for k in range(0,len(bank_payment_records[0])):
                extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
            for k in range(0,len(infolease_payment_records[0])):
                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
        # Otherwise we should check if this is a line with only contract data
        elif re.search(contract_only_line,line[1]):
            # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
            for k in range(0,len(bank_payment_records[0])):
                extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
            for k in range(0,len(infolease_payment_records[0])):
                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
        # If it doesn't hit either of these critera then continue since it's irelevant data
        else: continue
        i = 1 
        # used to track how many lines below the current line we're looking for the customer name
        # keep moving down a line and checking for a customer name
        # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
        while re.search(cust_name_line,lines[line[0]+i]) == None:
            i += 1
        # Once it hits, add the name to the dict
        extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
    dataframe = pd.DataFrame(extracted_data_dict)
    dataframe.to_excel(save_name, index=False)
    return dataframe
 def lb2(report:str, save_name:str):
    lines = report.splitlines()
    extracted_data_dict = {
        "SEQ" : [],
        "PYMT DATE" : [],
        "INV NUM" : [],
        "CHECK NUMBER" : [],
        "PAYMENT AMOUNT" : [],
        "NOTE" : [],
        "IL SEQ" : [],
        "CONTRACT NUM" : [],
        "IL PAYMENT AMOUNT" : [],
        "CUST NAME" : [],
    }
    columns = list(extracted_data_dict.keys())
    data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
    for line in enumerate(lines):
        match = False
        # Try to find the first SEQ # & a contract payment date e.i. ' 197     05/10/2022'
        if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
            match = True
            # Add all of the data points except customer name
            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
        # Check to see if this line contains only an infolease payment
        # Some times there are multiple infolease payments for a single bank record
        elif re.search(contract_number_regex, line[1]) != None:
            match = True
            # If there is then we can add the same data as the previous complete line
            [extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
            # Then add the new data for the infolease contract
            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
        # If we had a match we need a customer name to associate with it
        # Sometimes these can appear on the next page hense the while loop searching for a match
        if match:
            # We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
            # And the next line is blank
            if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
                i = 0
                # Look for a bunch of whitespace then some writing
                while not re.match("\s{98}.{34}", lines[line[0]+i]):
                    i +=1
                # Once we find it add the cust name to the dict (it's the only thing on the line)
                extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
            # if the condition above isnt met then the cust name is on the next line (even if that line is blank)
            else:
                extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
    dataframe = pd.DataFrame(extracted_data_dict)
    dataframe.to_excel(save_name, index=False)
    return dataframe
 r1 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.10_LOCKBOX_094_C"
 r2 = "/config/workspace/LEAF/IL Extract SRC/lb_errors/2022.05.11_LOCKBOX_094_C"
 with open(r1, errors="replace") as ifile:
    report = ifile.read()
 lb2(report, "test_lb_0510.xlsx")
 with open(r2, errors="replace") as ifile:
    report = ifile.read()
 lb2(report, "test_lb_0511.xlsx")
--- a/main.py
+++ b/main.py
@ -22,7 +22,6 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
        self.outputFileButton.clicked.connect(self.setOutput)
        self.processReportButton.clicked.connect(self.process_selection)
        self.openReportButton.clicked.connect(self.to_clipboard)
    def getfile(self):
        inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file') 
--- a/mainWindow_new.py
+++ b/mainWindow_new.py
@ -21,7 +21,7 @@ class Ui_MainWindow(object):
        icon2 = QtGui.QIcon()
        icon2.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
        iconCopy = QtGui.QIcon()
-        iconCopy.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
+        iconCopy.addPixmap(QtGui.QPixmap("copy.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
        iconProcess = QtGui.QIcon()
        iconProcess.addPixmap(QtGui.QPixmap("process.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
        MainWindow.setWindowIcon(icon)
--- a/man_inv_test.xlsx
+++ b/man_inv_test.xlsx
--- a/test_ach_0613.xlsx
+++ b/test_ach_0613.xlsx
--- a/test_lb_0510.xlsx
+++ b/test_lb_0510.xlsx
--- a/test_lb_0511.xlsx
+++ b/test_lb_0511.xlsx
--- a/test_lb_0613.xlsx
+++ b/test_lb_0613.xlsx