Large dataframes are broken

4 years ago · 87667c0fa3
commit 87667c0fa3
3 changed files with 788 additions and 0 deletions
--- a/ILExtract.py
+++ b/ILExtract.py
@ -0,0 +1,525 @@
+import os
+import pandas as pd
+from datetime import datetime as dt, timedelta
+import sys, getopt
+import re
+from pathlib import Path
+import time
+
+contract_number_regex = "\d{3}-\d{7}-\d{3}"
+
+
+class ILReport:
+    """
+    InfoLease Report class will be used to work with the files.
+    It makes it easier to add new reports to the workflow and to make it more clear where
+    the reports are coming from. It also helps with tracking reports that may not be ready yet.
+    """
+    def __init__(self, location, extraction_function = None, output_location = None, output_name = None):
+        # The location where the InfoLease report is stored
+        self.location = location
+        # The base name of the file, corresponds to the report type
+        # If output location not specified, save to the input location
+        if output_location == None:
+            self.output_location = Path(location).parent.absolute()
+        else: 
+            self.output_location = output_location
+        # This is optional but has a default
+        if output_name == None:
+            # Get the file name of the input and remove the date
+            self.output_name = os.path.basename(f"{self.location}")\
+                .replace(f"{(dt.now() - timedelta(days=+1)).strftime('%Y.%m.%d')}","")
+        else:
+            self.output_name = output_name
+        # The function used to extract the data from the report
+        self.x_method = extraction_function
+        # Tracks whether the data was successfully exctracted
+        self.successful  = False
+        
+        
+    def run(self) -> int:
+        """
+        This method is what actully run the report. I uses the specidied extraction function to create and save an excel document.
+        SUCESS returns 0
+        ERROR returns 1
+        Failure is also noted by self.success == False
+        """
+        try:
+            # Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output)
+            with open(self.location, errors="replace") as ifile:
+                report = ifile.read()
+        except IOError as ioe:
+            print(f"Failed to open file: {self.location}\n{ioe}")
+            self.successful = False
+            return 1
+        try: 
+            # Run the associated method to extract the data and get the dataframe
+            dataframe = self.x_method(report)
+            try:
+                assert(len(dataframe) > 1)
+            except Exception as e:
+                print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}")
+                self.successful = False
+                return 1
+        except Exception as e:
+            print(f"{self.output_name} failed to process:\n{e}")
+            self.successful = False
+            return 1
+        try:
+            # Save the dataframe as an excel document
+            dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False)
+        except Exception as e:
+            self.successful = False
+            print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}")
+            return 1
+        self.successful = True
+        return 0
+    def process(self):
+        try:
+            # Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output)
+            with open(self.location, errors="replace") as ifile:
+                report = ifile.read()
+        except IOError as ioe:
+            print(f"Failed to open file: {self.location}\n{ioe}")
+            self.successful = False
+            return 1
+        try: 
+            # Run the associated method to extract the data and get the dataframe
+            dataframe = self.x_method(report)
+            try:
+                assert(len(dataframe) > 1)
+            except Exception as e:
+                print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}")
+                self.successful = False
+                return 1
+        except Exception as e:
+            print(f"{self.output_name} failed to process:\n{e}")
+            self.successful = False
+            return 1
+        return dataframe
+   
+
+def create_line_divider(breakage_list: list):
+    """
+    This allows for the creation of a custom data extractor
+    Breakage list defines the split points that will be used for the line
+    Example
+    Given breakage_list [10, 20, 30]
+    using slot_num 0 in the resulting extract_line_slot will yield
+    characters 0 - 10 from the string. 
+    Slot 1 would give characters 10 - 20
+    """
+    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
+        """
+        Pulls data from a line/string using break points defined by the
+        parent function.
+        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
+        Will automatically convert numbers to floats
+        """
+        assert(slot_num < len(breakage_list)+1)
+        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
+        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
+        data = line_string[low_range:high_range].strip().replace(",", "")
+        try: data = float(data)
+        except: pass
+        if debug:
+            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
+        return data
+    return extract_line_slot
+
+
+######################################################################################################################
+#                                                                                                                    #
+#                   EXTRACTION FUNCTIONS: used to pull data out of specific InfoLease report types                   #
+#                                                                                                                    #
+######################################################################################################################
+"""
+COMMON EXTRACTION COMPONENTS/FEATURES:
+    - lines = report.splitlines() : splits the reports into a list of lines (based on \n line breaks in document)
+
+    - extracted_data_dict : this is a dictionary that will hold the extracted data and will be used to create the dataframe
+
+    - columns = list(extracted_data_dict.keys()) : breaks the extracted_data_dict into a list of its keys (excel column heads)
+
+    - data_extractor = create_line_divider([#,#,#,#,#]): This creates a function we can use to pull data from a line based on 
+    its 'slot position'. A slot position is the characters between the numbers specified in the list passed into the function
+
+    - for line in enumerate(lines): iterates through each line in the document. Line is a tuple of (line number, line string)
+    having the line number can be very useful when we need to access data in adjacent lines 
+
+    - line# = list(zip(columns[#:#],[i for i in range(#,#)])): This creates a list with the tuple (column name, slot number).
+    It allows us to iterate through this list and make sure the correct data slots are being used for each column/key in the 
+    data dictionary
+
+COMMON REGEX COMPONENTS
+\d : any digit [0-9]
+\D : any character that is not a digit
+\s : whitespace
+.  : any character besides newline (\n)
+{#}: # number of the preceding character
+*  : 0 or more repetitions of the preceding character
+"""
+
+
+def ach(report: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+    "ContractNumber" : [],
+    "CustomerName" : [],
+    "BankCode" : [],
+    "BankNumber": [],
+    "AccountNumber" : [],
+    "Payment" : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    data_extractor = create_line_divider([19,57,67,82,104])
+    bank_number_regex = "\d{9}"
+    for line in enumerate(lines):
+        if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
+            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns))]
+    return pd.DataFrame(extracted_data_dict)
+
+
+def disposition(report: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+    "ContractNumber" : [],
+    "Amount Rec" : [],
+    "Trans Num" : [],
+    "Date RCVD": [],
+    "Date Posted" : [],
+    "Last Pymt Due" : [],
+    "Date Due" : [],
+    "Residual Amt" : [],
+    "Term Date" : [],
+    "Total Pastdue" : [],
+    "Customer Name" : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    data_extractor = create_line_divider([15,32,41, 51, 61, 79,88, 103, 114])
+    for line in enumerate(lines):
+        if re.search(contract_number_regex, data_extractor(0,line[1])):
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)]
+            extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip())
+    return pd.DataFrame(extracted_data_dict)
+
+
+def gainloss(report: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+        'REM RENT RCVB' :  [],
+        'GUAR RESIDUAL' :  [],
+        'ASSET VAL' :  [],
+        'EQUITY ADDON' :  [],
+        'CURR INT RCVB' :  [],
+        'MISC G/L' :  [],
+        'BLENDED INC' :  [],
+        'CONTRACT NUMBER' :  [],
+        'CURR RENT RCVB' :  [],
+        'RESIDUAL' :  [],
+        'END/SEC DEP' :  [],
+        'SALES TAX' :  [],
+        'INVENT CHANGE' :  [],
+        'NET RESERVE' :  [],
+        'LATE CHGS' :  [],
+        'CUSTOMER NAME' :  [],
+        'UNEARNED FIN' :  [],
+        'UNAMORT RES' :  [],
+        'MISC' :  [],
+        'MISC TAX' :  [],
+        'CASH RECEIVED' :  [],
+        'RCV OFFSET' :  [],
+        'GAIN/LOSS' :  [],
+        'DISPOSITION CODE' :  [],
+        'DISPOSITION DESC'
+        'UNEARNED IDC' :  [],
+        'UNPAID INT' :  [],
+        'PENALTY FEE' :  [],
+        'UNPAID ACCRD' :  [],
+        'RENEWAL RCVBL' :  [],
+        'DEF REN INC' :  [],
+        'DEF REN INT' :  [],
+        'EARNED IDC' :  [],
+        'GST BOOK G/L' :  [],
+        'UNRECOG GST' :  [],
+        'INT EARNED' :  [],
+        'OVER/SHORT' :  [],
+        'OPER RCVB' :  [],
+        'OPER BASIS' :  [],
+        'CTD OPER DEPR' :  [],
+    }
+    # L0: BlendedInc 6
+    # L1: Late CHGS 14
+    # L2: Gain/Loss 22
+    # L3: Def Ren Int 30
+    # l4 Over/Short 35
+    # L5: CTD OPER
+    columns = list(extracted_data_dict.keys())
+    # These line data are used to tell the data extrator which values to pull for each line of
+    # relevant data. It parits dictionary keys with thier corresponding data slot in the line
+    # So that they can be iterated through during data extraction
+    line0 = list(zip(columns[0:7],[i for i in range(1,8)]))
+    line1 = list(zip(columns[7:15],[i for i in range(0,8)]))
+    line2 = list(zip(columns[15:23], [i for i in range(0,8)]))
+    line3 = list(zip(columns[23:31], [i for i in range(0,8)]))
+    line4 = list(zip(columns[31:36], [i for i in range(1,8) if i not in [3,6]]))
+    line5 = list(zip(columns[36:], [i for i in range(1,4)]))
+    data_extractor = create_line_divider([27,43,58,74,88,105,120])
+    for line in enumerate(lines):
+        if (re.search(contract_number_regex, data_extractor(0,line[1])) != None)&\
+            (type(data_extractor(1,line[1])) == float) :
+            data_section = lines[line[0]-1:line[0]+5]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[4])) for c in line4]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[5])) for c in line5]
+
+    df = pd.DataFrame(extracted_data_dict)
+    # The Accounting team wanted the disposotion code split into number and descriptionso...
+    disp_code = []
+    disp_descriptoin = []
+    for d in df['DISPOSITION CODE'].to_list():
+        disp_split = d.split(" ")
+        disp_code.append(disp_split[0])
+        disp_descriptoin.append(" ".join(disp_split[1:]))
+    df["DISPOSITION CODE"] = disp_code
+    df["DISPOSITION DESC"] = disp_descriptoin
+    return df
+
+# Works for Net-inv-loans & NIV-after
+def net_invest_trial_balance(report: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+        'CUSTOMER NAME' :  [],
+        'CURR INT RCVB' :  [],
+        'UNEARNED BLENDED' :  [],
+        'BLEND NET INV' :  [],
+        'LEASE NUMBER' :  [],
+        'GROSS CONTRACT' :  [],
+        'CURR RENT RCVB' :  [],
+        'UNEARN FIN' :  [],
+        'END DEPOSIT' :  [],
+        'SEC DEPOSIT' :  [],
+        'LEASE PYMTS' :  [],
+        'TOTAL' :  [],
+        'CONTRACT STAT' :  [],
+        'PAYMENTS RCVD' :  [],
+        'REM RENT RCVB' :  [],
+        'UNEARN RESID' :  [],
+        'PROV LOSS' :  [],
+        'NET RESERVE' :  [],
+        'UNEARN INC' :  [],
+        'BAL REMAINING' :  [],
+        'RESIDUAL' :  [],
+        'UNPAID INT' :  [],
+        'NET INV' :  [],
+        'UNEARNED IDC' :  [],
+    }
+    columns = list(extracted_data_dict.keys())
+    line0 = list(zip(columns[0:4], [0,3,4,5]))
+    line1 = list(zip(columns[4:12], [i for i in range(0,8)]))
+    line2 = list(zip(columns[12:19], [i for i in range(0,7)]))
+    line3 = list(zip(columns[19:], [i for i in range(1,6)]))
+
+    data_extractor = create_line_divider([18,35,53,67,87,106,117])
+    for line in enumerate(lines):
+        slot1 = data_extractor(0,line[1],False)
+        if type(slot1) != str : continue
+        if re.search(contract_number_regex, slot1) != None:
+                data_section = lines[line[0]-1:line[0]+4]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
+                [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3]
+    return pd.DataFrame(extracted_data_dict)
+
+
+def lockbox(report: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+        "CustomerName" : [],
+        "PaymentDate" : [],
+        "InvoiceNumber" : [],
+        "CheckNumber" : [],
+        "InvoicePayment" : [],
+        "ContractNumber" : [],
+        "ContractPayment" : [],
+    }
+    # These are lists of the dictionary columns/keys and the data slots in which
+    # that data can be found in the report. this way we can iterate through them
+    # While extracting data
+    bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
+    infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
+
+    # Below are the Regular Exppressions used to find relvant data lines
+    full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
+    contract_only_line = "\s{90}\d.{7}1\d{2}-"
+    cust_name_line = "\s{98}.{28}\D*"
+    # The data extractor allows us to extract data from the report using slots
+    # Slots are ranges of character denote by the list feed into the creation function
+    data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
+    for line in enumerate(lines):
+        # We can skip empty lines
+        if len(line[1]) == 0: continue
+        # First we should check if there is a full line of data (defined by regex)
+        if re.search(full_line, line[1]):
+            # If this is true then we can iterate through the lists we created earlier and append the data to our dict
+            for k in range(0,len(bank_payment_records[0])):
+                extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
+            for k in range(0,len(infolease_payment_records[0])):
+                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
+        # Otherwise we should check if this is a line with only contract data
+        elif re.search(contract_only_line,line[1]):
+            # If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
+            for k in range(0,len(bank_payment_records[0])):
+                extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
+            for k in range(0,len(infolease_payment_records[0])):
+                extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
+        # If it doesn't hit either of these critera then continue since it's irelevant data
+        else: continue
+        i = 1 
+        # used to track how many lines below the current line we're looking for the customer name
+        # keep moving down a line and checking for a customer name
+        # Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
+        while re.search(cust_name_line,lines[line[0]+i]) == None:
+            i += 1
+        # Once it hits, add the name to the dict
+        extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
+    return pd.DataFrame(extracted_data_dict)
+
+
+def minv(report: str):
+    lines = report.splitlines()
+    data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
+    extracted_data_dict = {
+        "ContractNumber" : [],
+        "UTAB_OIC_DUE" : [],
+        "RentalDue" : [],
+        "UTAB_OIC_PYMT" : [],
+        "ChargeType" : [],
+        "OutstandBalance" : [],
+        "BizSegment" : [],
+        "BookingDate" : [],
+        "Branch" : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    for line in enumerate(lines):
+        if re.search(contract_number_regex, line[1]) != None:
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
+    #All the list lengths need to be the same so if anything was missed it will fail to build
+    return pd.DataFrame(extracted_data_dict)
+
+# Good for PUB_WIRES, VMCC, PBP_EPAY, returned check
+def payment_transactions(report: str):
+    lines = report.splitlines()
+    data_extractor = create_line_divider([6,33,52,62,80,89,110,121])
+    extracted_data_dict = {
+    'SEQ' :  [],
+    'ACCOUNT NUMBER' :  [],
+    'PYMT METHOD' :  [],
+    'DATE RCVD' :  [],
+    'AMOUNT' :  [],
+    'REF NO': [],
+    'PAYMENT MEMO' :  [],
+    'PYMT TYPE' :  [],
+    'CHECK NO' :  [],
+    'CUSTOMER NAME' :  [],
+    'TRANSACTIONS NUM': [],
+    'INV NO' : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    transaction_num_regex = "\d{8}"
+    for line in enumerate(lines):
+        slot1 = data_extractor(1,line[1],False)
+        if type(slot1) != str : continue
+        if re.search(contract_number_regex, slot1) != None:
+            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)]
+            tnum_match = re.search(transaction_num_regex, lines[line[0]+1])
+            if tnum_match:
+                tnum = lines[line[0]+1][tnum_match.start():tnum_match.end()]
+            else:
+                 tnum = ""
+            extracted_data_dict["TRANSACTIONS NUM"].append(tnum)
+            cname = lines[line[0]+1][6:37].strip()
+            extracted_data_dict['CUSTOMER NAME'].append(cname)
+            inv_no = lines[line[0]+1][79:90].strip()
+            extracted_data_dict['INV NO'].append(inv_no)
+    return pd.DataFrame(extracted_data_dict)
+
+
+def renewal_net_invest_trial_balance(report: str):
+    lines = report.splitlines()
+    data_extractor = create_line_divider([21,29,43,58,71,88,99,113])
+    extracted_data_dict = {
+        'CUSTOMER NAME' :  [],
+        'TYPE' :  [],
+        'GROSS RENEWAL' :  [],
+        'CUR RENT RCVB' :  [],
+        'UNEARNED RIN' :  [],
+        'REMAINING RES' :  [],
+        'LEASE PYMTS' :  [],
+        'CONTRACT NUMBER' :  [],
+        'RENEWAL' :  [],
+        'PAYMENTS RCVD' :  [],
+        'REM RENT RCVB' :  [],
+        'UNPAID RES' :  [],
+        'SECURITY DEP' :  [],
+        'NET INVEST' :  [],
+        'UNEARN INCOME' :  [],
+        'TOTAL' :  [],
+        'REMAINING BAL' :  [],
+        'FINANCED RES' :  [],
+    }
+    columns = list(extracted_data_dict.keys())
+    line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7]))
+    line1 = list(zip(columns[7:16], [i for i in range(0,9)]))
+    line2 = list(zip(columns[16:], [3,4]))
+
+    for line in enumerate(lines):
+        slot1 = data_extractor(0,line[1],False)
+        if type(slot1) != str : continue
+        if re.search(contract_number_regex, slot1) != None:
+            data_section = lines[line[0]-1:line[0]+4]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
+            [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
+    return pd.DataFrame(extracted_data_dict)
+
+
+def unapplied(report: str):
+    lines = report.splitlines()
+    extracted_data_dict = {
+    "Trans Num" : [],
+    "ContractNumber" : [],
+    "CheckNum" : [],
+    "Date RCVD" : [],
+    "Asset ID": [],
+    "Reversed Amt" : [],
+    "Branch" : [],
+    "Unapplied Susp Acct" : [],
+    "PaymentMemo" : [],
+    "Payers Name" : [],
+    "Batch Num" : [],
+    "Posting Date" : [],
+    "Unapplied Amt" : [],
+    "Rev Post Date" : [],
+    "Ref Num" : [],
+    "Check Amt" : [],
+    "Reason Code" : [],
+    }
+    columns = list(extracted_data_dict.keys())
+    # Iterate through the lines one at a time to look for relavant data
+    # Use enumerate so that we know which line  we're currently working on
+    # this allows us to also work in the 'report' structure so that we can 
+    # grab the customer name from the line proceding the data
+    data_extractor = create_line_divider([9,25, 38, 50, 65, 80, 89, 108])
+    trans_num = "\d{7}"
+    for line in enumerate(lines):
+        if (re.search("\d{7}", str(data_extractor(0,line[1],debug=False))) != None) &\
+        (re.search("\d{2}/\d{2}/\d{4}", str(data_extractor(3,line[1],debug=False))) != None):
+            [extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0,9)]
+            [extracted_data_dict[columns[8+c]].append(data_extractor(c,lines[line[0]+1])) for c in range(1,len(columns)-8)]
+    return pd.DataFrame(extracted_data_dict)
+
--- a/main.py
+++ b/main.py
@ -0,0 +1,123 @@
+from mainWindow_new import Ui_MainWindow
+import sys
+import os 
+import pandas as pd 
+from PyQt5 import QtWidgets
+from datetime import datetime as dt
+import ILExtract as ilx
+
+
+class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
+    def __init__(self, *args, obj=None, **kwargs):
+        super(MainWindow, self).__init__(*args, **kwargs)
+        self.setupUi(self)
+
+        self.inputFile = ""
+        self.outputFile = ""
+        self.rtp = False # Ready to Process
+        self.ofa = False # Output file ready
+    
+        # Actions
+        self.inputFileButton.clicked.connect(self.getfile)
+        self.outputFileButton.clicked.connect(self.setOutput)
+        self.processReportButton.clicked.connect(self.process_selection)
+        self.openReportButton.clicked.connect(self.to_clipboard)
+        
+
+    def getfile(self):
+        inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file') 
+        self.inputFileLE.setText(inFile[0])
+        if inFile[0] == '' : return ''
+        print(f"Input File: {inFile}")
+        with open(inFile[0], errors="replace") as inF:
+            txt = inF.read()
+            print(txt)
+            self.inputFilePreview.setText(txt)
+        self.inputFile = inFile[0]
+        inFileEnd = inFile[0].split('/')[-1]
+        outputRoot = self.inputFile.removesuffix(inFileEnd)
+        self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.csv"
+        self.outputFileLE.setText(self.outputFile)
+        if self.reportTypeCB.currentText().split(" ")[-1].lower() not in self.inputFile.lower():
+            print("Possibly wrong file type")
+            warning = QtWidgets.QMessageBox()
+            warning.setWindowTitle("Warning: File Type Mis-Match")
+            warning.setText(f"Selected report type is {self.reportTypeCB.currentText()} but input file did not contain '{self.reportTypeCB.currentText().split(' ')[-1].lower()}'!\n\
+Make sure you select the correct report type before processing!")
+            s = warning.exec()
+        self.check_ready_to_process()
+
+    def setOutput(self):
+        outFile = QtWidgets.QFileDialog.getSaveFileName(self, "Output file name")
+        if outFile[0] == '': return ''
+        self.outputFileLE.setText(f"{outFile[0]}__{dt.now().strftime('%Y%m%d_%H_%M')}.xlsx")
+        print(f"Output: {outFile}")
+        self.outputFile = f"{outFile[0]}__{dt.now().strftime('%Y%m%d_%H_%M')}.xlsx"
+        self.check_ready_to_process()
+    
+    def check_ready_to_process(self):
+        self.rtp = True if ((self.inputFile != "") & (self.outputFile != "")) else False
+        if self.rtp :
+            self.processReportButton.setEnabled(True)
+    
+    def process_selection(self):
+        with open(self.inputFile, errors="replace") as inF:
+            reportString = inF.read()
+        try:
+            if self.reportTypeCB.currentText() == "ACH":
+                extract_function = ilx.ach
+            elif self.reportTypeCB.currentText() == "Disposition":
+                extract_function = ilx.disposition
+            elif self.reportTypeCB.currentText() == "Gain Loss":
+                extract_function = ilx.gainloss
+            elif self.reportTypeCB.currentText() == "Lock Box":
+                extract_function = ilx.lockbox
+            elif self.reportTypeCB.currentText() == "Minv_C":
+                extract_function = ilx.minv
+            elif self.reportTypeCB.currentText() == "Net Inv. Loans":
+                extract_function = ilx.net_invest_trial_balance
+            elif self.reportTypeCB.currentText() == "NI Renewal":
+                extract_function = ilx.renewal_net_invest_trial_balance
+            elif self.reportTypeCB.currentText() == "NIV After":
+                extract_function = ilx.net_invest_trial_balance
+            elif self.reportTypeCB.currentText() == "PBP Epay":
+                extract_function = ilx.payment_transactions
+            elif self.reportTypeCB.currentText() == "Unapplied":
+                extract_function = ilx.unapplied
+            elif self.reportTypeCB.currentText() == "VMCC":
+                extract_function = ilx.payment_transactions
+            elif self.reportTypeCB.currentText() == "Wires":
+                extract_function = ilx.payment_transactions
+            elif self.reportTypeCB.currentText() == "Returns":
+                extract_function = ilx.payment_transactions
+            
+            dataframe = ilx.ILReport(
+                location= self.inputFile,
+                extraction_function=extract_function,
+                output_location=self.outputFile,
+                ).process()
+            dataframe.to_csv(self.outputFile, index=False)
+            smallDF = dataframe.iloc[0:500,:]
+            self.inputFilePreview.setText(smallDF.to_html(index=False))          
+            self.openReportButton.setEnabled(True)
+        except: 
+            error = QtWidgets.QMessageBox()
+            error.setWindowTitle('Error Processing File!')
+            error.setText(f"Unable to process {self.inputFile}!\nPlease check input file!")
+
+    def preview_report(self):
+        df = pd.read_excel(self.outputFile)
+        self.inputFilePreview.setText(df.to_html())
+    
+    def to_clipboard(self):
+        df = pd.read_csv(self.outputFile)
+        df.to_clipboard(excel=True)
+    
+
+app = QtWidgets.QApplication(sys.argv)
+app.setStyle("Fusion")
+
+window = MainWindow()
+window.setWindowTitle("IL Extract")
+window.show()
+app.exec()
--- a/mainWindow_new.py
+++ b/mainWindow_new.py
@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+
+# Form implementation generated from reading ui file 'MonarchReplace2.ui'
+#
+# Created by: PyQt5 UI code generator 5.15.6
+#
+# WARNING: Any manual changes made to this file will be lost when pyuic5 is
+# run again.  Do not edit this file unless you know what you are doing.
+
+
+from PyQt5 import QtCore, QtGui, QtWidgets
+
+
+class Ui_MainWindow(object):
+    def setupUi(self, MainWindow):
+        MainWindow.setObjectName("MainWindow")
+        MainWindow.resize(1001, 664)
+        MainWindow.setWindowTitle('IL Extract')
+        icon = QtGui.QIcon()
+        icon.addPixmap(QtGui.QPixmap("extract.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
+        icon2 = QtGui.QIcon()
+        icon2.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
+        iconCopy = QtGui.QIcon()
+        iconCopy.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
+        iconProcess = QtGui.QIcon()
+        iconProcess.addPixmap(QtGui.QPixmap("process.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
+        MainWindow.setWindowIcon(icon)
+        self.centralwidget = QtWidgets.QWidget(MainWindow)
+        self.centralwidget.setObjectName("centralwidget")
+        self.inputFilePreview = QtWidgets.QTextBrowser(self.centralwidget)
+        self.inputFilePreview.setGeometry(QtCore.QRect(20, 220, 951, 391))
+        self.inputFilePreview.setObjectName("inputFilePreview")
+        self.processReportButton = QtWidgets.QPushButton(self.centralwidget)
+        self.processReportButton.setEnabled(False)
+        self.processReportButton.setGeometry(QtCore.QRect(20, 180, 250, 36))
+        self.processReportButton.setObjectName("processReportButton")
+        self.processReportButton.setIcon(iconProcess)
+        self.openReportButton = QtWidgets.QPushButton(self.centralwidget)
+        self.openReportButton.setEnabled(False)
+        self.openReportButton.setGeometry(QtCore.QRect(280, 180, 241, 36))
+        self.openReportButton.setObjectName("openReportButton")
+        self.openReportButton.setIcon(iconCopy)
+        self.layoutWidget = QtWidgets.QWidget(self.centralwidget)
+        self.layoutWidget.setGeometry(QtCore.QRect(21, 90, 951, 84))
+        self.layoutWidget.setObjectName("layoutWidget")
+        self.fileSettingsBox = QtWidgets.QVBoxLayout(self.layoutWidget)
+        self.fileSettingsBox.setContentsMargins(0, 0, 0, 0)
+        self.fileSettingsBox.setObjectName("fileSettingsBox")
+        self.inputFileBox = QtWidgets.QHBoxLayout()
+        self.inputFileBox.setObjectName("inputFileBox")
+        self.inputFileButton = QtWidgets.QPushButton(self.layoutWidget)
+        self.inputFileButton.setMinimumSize(QtCore.QSize(250, 0))
+        self.inputFileButton.setMaximumSize(QtCore.QSize(250, 36))
+        self.inputFileButton.setIcon(icon2)
+        self.inputFileButton.setObjectName("inputFileButton")
+        self.inputFileBox.addWidget(self.inputFileButton)
+        self.inputFileLE = QtWidgets.QLineEdit(self.layoutWidget)
+        self.inputFileLE.setReadOnly(True)
+        self.inputFileLE.setObjectName("inputFileLE")
+        self.inputFileBox.addWidget(self.inputFileLE)
+        self.fileSettingsBox.addLayout(self.inputFileBox)
+        self.outFileLocation = QtWidgets.QHBoxLayout()
+        self.outFileLocation.setObjectName("outFileLocation")
+        self.outputFileButton = QtWidgets.QPushButton(self.layoutWidget)
+        self.outputFileButton.setMinimumSize(QtCore.QSize(250, 0))
+        self.outputFileButton.setMaximumSize(QtCore.QSize(250, 36))
+        self.outputFileButton.setIcon(icon2)
+        self.outputFileButton.setObjectName("outputFileButton")
+        self.outFileLocation.addWidget(self.outputFileButton)
+        self.outputFileLE = QtWidgets.QLineEdit(self.layoutWidget)
+        self.outputFileLE.setReadOnly(True)
+        self.outputFileLE.setObjectName("outputFileLE")
+        self.outFileLocation.addWidget(self.outputFileLE)
+        self.fileSettingsBox.addLayout(self.outFileLocation)
+        self.reportTypeCB = QtWidgets.QComboBox(self.centralwidget)
+        self.reportTypeCB.setGeometry(QtCore.QRect(21, 51, 250, 37))
+        self.reportTypeCB.setObjectName("reportTypeCB")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeCB.addItem("")
+        self.reportTypeL = QtWidgets.QLabel(self.centralwidget)
+        self.reportTypeL.setGeometry(QtCore.QRect(21, 21, 144, 24))
+        font = QtGui.QFont()
+        font.setPointSize(14)
+        font.setBold(True)
+        font.setWeight(75)
+        self.reportTypeL.setFont(font)
+        self.reportTypeL.setObjectName("reportTypeL")
+        MainWindow.setCentralWidget(self.centralwidget)
+        self.menubar = QtWidgets.QMenuBar(MainWindow)
+        self.menubar.setGeometry(QtCore.QRect(0, 0, 1001, 29))
+        self.menubar.setObjectName("menubar")
+        MainWindow.setMenuBar(self.menubar)
+        self.statusbar = QtWidgets.QStatusBar(MainWindow)
+        self.statusbar.setObjectName("statusbar")
+        MainWindow.setStatusBar(self.statusbar)
+        self.reportTypeL.setBuddy(self.reportTypeCB)
+
+        self.retranslateUi(MainWindow)
+        QtCore.QMetaObject.connectSlotsByName(MainWindow)
+        MainWindow.setTabOrder(self.reportTypeCB, self.inputFileButton)
+        MainWindow.setTabOrder(self.inputFileButton, self.outputFileButton)
+        MainWindow.setTabOrder(self.outputFileButton, self.processReportButton)
+        MainWindow.setTabOrder(self.processReportButton, self.openReportButton)
+        MainWindow.setTabOrder(self.openReportButton, self.inputFileLE)
+        MainWindow.setTabOrder(self.inputFileLE, self.outputFileLE)
+        MainWindow.setTabOrder(self.outputFileLE, self.inputFilePreview)
+
+    def retranslateUi(self, MainWindow):
+        _translate = QtCore.QCoreApplication.translate
+        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
+        self.processReportButton.setText(_translate("MainWindow", "&Process Report"))
+        self.openReportButton.setText(_translate("MainWindow", "&Copy to Clipboard"))
+        self.inputFileButton.setText(_translate("MainWindow", "Select &InfoLease Report"))
+        self.inputFileLE.setPlaceholderText(_translate("MainWindow", "No file selected"))
+        self.outputFileButton.setText(_translate("MainWindow", "Select &Report Output Location"))
+        self.outputFileLE.setPlaceholderText(_translate("MainWindow", "No location selected"))
+        self.reportTypeCB.setItemText(0, _translate("MainWindow", "ACH"))
+        self.reportTypeCB.setItemText(1, _translate("MainWindow", "Disposition"))
+        self.reportTypeCB.setItemText(2, _translate("MainWindow", "Gain Loss"))
+        self.reportTypeCB.setItemText(3, _translate("MainWindow", "Lock Box"))
+        self.reportTypeCB.setItemText(4, _translate("MainWindow", "Minv_C"))
+        self.reportTypeCB.setItemText(5, _translate("MainWindow", "Net Inv. Loans"))
+        self.reportTypeCB.setItemText(6, _translate("MainWindow", "NI Renewal"))
+        self.reportTypeCB.setItemText(7, _translate("MainWindow", "NIV After"))
+        self.reportTypeCB.setItemText(8, _translate("MainWindow", "PBP Epay"))
+        self.reportTypeCB.setItemText(9, _translate("MainWindow", "Returned Check"))
+        self.reportTypeCB.setItemText(10, _translate("MainWindow", "Unapplied"))
+        self.reportTypeCB.setItemText(11, _translate("MainWindow", "VMCC"))
+        self.reportTypeCB.setItemText(12, _translate("MainWindow", "Wires"))
+        self.reportTypeL.setText(_translate("MainWindow", "Infolease Report"))