From df96574a98eb095b38c8303a26c0f6bcde861262 Mon Sep 17 00:00:00 2001 From: Griffiths Lott Date: Mon, 6 Jun 2022 11:13:46 -0400 Subject: [PATCH] cleaned up main, added comments to ILextract --- ILExtract.py | 179 ++++++++++++++++++++------------- RenewalTest.py | 13 ++- ach_special.py | 87 ++++++++++++++++ copy.svg | 60 +++++++++++ extract.ico | Bin 0 -> 7077 bytes extract.svg | 1 + folder.svg | 2 + main.py | 25 +++-- mainWindow.ui | 268 +++++++++++++++++++++++++++++++++++++++++++++++++ process.svg | 1 + 10 files changed, 553 insertions(+), 83 deletions(-) create mode 100644 ach_special.py create mode 100644 copy.svg create mode 100644 extract.ico create mode 100644 extract.svg create mode 100644 folder.svg create mode 100644 mainWindow.ui create mode 100644 process.svg diff --git a/ILExtract.py b/ILExtract.py index 2ff38bc..3eabbf3 100644 --- a/ILExtract.py +++ b/ILExtract.py @@ -5,45 +5,33 @@ import sys, getopt import re from pathlib import Path import time +import numpy as np +# contract numbers are a common feature in many reports to it's +# useful to have the regex for them globally avaiable contract_number_regex = "\d{3}-\d{7}-\d{3}" - class ILReport: """ InfoLease Report class will be used to work with the files. It makes it easier to add new reports to the workflow and to make it more clear where the reports are coming from. It also helps with tracking reports that may not be ready yet. """ - def __init__(self, location, extraction_function = None, output_location = None, output_name = None): + def __init__(self, location, extraction_function, output_location = None): # The location where the InfoLease report is stored self.location = location - # The base name of the file, corresponds to the report type # If output location not specified, save to the input location if output_location == None: self.output_location = Path(location).parent.absolute() else: - self.output_location = output_location - # This is optional but has a default - if output_name == None: - # Get the file name of the input and remove the date - self.output_name = os.path.basename(f"{self.location}")\ - .replace(f"{(dt.now() - timedelta(days=+1)).strftime('%Y.%m.%d')}","") - else: - self.output_name = output_name + self.output_location = output_location # The function used to extract the data from the report self.x_method = extraction_function # Tracks whether the data was successfully exctracted self.successful = False - - def run(self) -> int: - """ - This method is what actully run the report. I uses the specidied extraction function to create and save an excel document. - SUCESS returns 0 - ERROR returns 1 - Failure is also noted by self.success == False - """ + + def process(self): try: # Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output) with open(self.location, errors="replace") as ifile: @@ -55,48 +43,16 @@ class ILReport: try: # Run the associated method to extract the data and get the dataframe dataframe = self.x_method(report, self.output_location) - try: - assert(len(dataframe) > 1) - except Exception as e: - print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}") - self.successful = False - return 1 except Exception as e: - print(f"{self.output_name} failed to process:\n{e}") - self.successful = False - return 1 - try: - # Save the dataframe as an excel document - dataframe.to_excel(self.output_location, index = False, engine="openpyxl") - except Exception as e: - self.successful = False - print(f"{self.output_location} failed to save to excel!\n{dataframe}\n{e}") - return 1 - self.successful = True - return dataframe - - def process(self): - try: - # Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output) - with open(self.location, errors="replace") as ifile: - report = ifile.read() - except IOError as ioe: - print(f"Failed to open file: {self.location}\n{ioe}") + print(f"Failed to create dataframe: {self.output_name}\n{e}") self.successful = False return 1 - #try: - # Run the associated method to extract the data and get the dataframe - dataframe = self.x_method(report, self.output_location) try: assert(len(dataframe) > 1) except Exception as e: print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}") self.successful = False return 1 - #except Exception as e: - # print(f"{self.output_name} failed to process:\n{e}") - # self.successful = False - # return 1 return dataframe @@ -117,9 +73,11 @@ def create_line_divider(breakage_list: list): ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' Will automatically convert numbers to floats """ + # We can't have a slot number higher than the number of slots assert(slot_num < len(breakage_list)+1) low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + # In order to create a float we need to remove the , from the string data = line_string[low_range:high_range].strip().replace(",", "") try: data = float(data) except: pass @@ -188,21 +146,36 @@ def ach(report: str, save_name: str): bank_number_regex = "\d{9}" batch_num_regex = "BATCH \d{4} TOTAL" for line in enumerate(lines): + # Check for a contract number and a bank number in the line if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None): + # Iterates through the columns list and adds the corresponding slot number to the dictonary for the column + # Here the order of the columns (keys in dictonary) matter since they need to be in the same order as + # the slot numbers [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)] + # This searches for a statement that looks like a batch number + # This sums the contracts by thier lessor code. A feature requested by cash apps if re.search(batch_num_regex, line[1]) != None: + # Batch number is always in characters 96 to 101 batches["batch_num"].append(line[1][96:101]) + # Payment date will be 2 lines below that between charactes 114 and 125 batches["payment_date"].append(lines[line[0]+2][114:125]) + # Lessor is just the first three number sof the contract number batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3]) + # Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float batches["total"].append(float(line[1][107:125].strip().replace(",", ""))) batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", ""))) + # Any time there's a new batch we need to add this data to the dictionary up up to the currrent place + # So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))] [extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))] [extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))] - + # Now the dictioanry lists should all be equal lengths and we can create a dataframe dataframe = pd.DataFrame(extracted_data_dict) + # We're creating two sheets: data & summary so we need to open and excel writer + # This also helps with a bug caused by larger dataframes with pd.ExcelWriter(save_name) as writer: dataframe.to_excel(writer, index=False, sheet_name="data") + # The batches dictioanry is converted to a dataframe and added as it's own sheet pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary") return dataframe @@ -226,6 +199,7 @@ def disposition(report: str, save_name: str): for line in enumerate(lines): if re.search(contract_number_regex, data_extractor(0,line[1])): [extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)] + # Customer name is on a seperate line so we need to grab that seperately extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip()) dataframe = pd.DataFrame(extracted_data_dict) dataframe.to_excel(save_name, index=False) @@ -284,8 +258,11 @@ def gainloss(report: str, save_name: str): # L5: CTD OPER columns = list(extracted_data_dict.keys()) # These line data are used to tell the data extrator which values to pull for each line of - # relevant data. It parits dictionary keys with thier corresponding data slot in the line - # So that they can be iterated through during data extraction + # relevant data. It pairs dictionary keys with thier corresponding data slot in the line + # so that they can be iterated through during data extraction + # + # It looks confusing but makes more sense if you look at the actual Info Lease reports + # This is one of the messiest reports line0 = list(zip(columns[0:7],[i for i in range(1,8)])) line1 = list(zip(columns[7:15],[i for i in range(0,8)])) line2 = list(zip(columns[15:23], [i for i in range(0,8)])) @@ -294,6 +271,7 @@ def gainloss(report: str, save_name: str): line5 = list(zip(columns[36:], [i for i in range(1,4)])) data_extractor = create_line_divider([27,43,58,74,88,105,120]) for line in enumerate(lines): + # The line must contain a contract number and the first data slot should be a float if (re.search(contract_number_regex, data_extractor(0,line[1])) != None)&\ (type(data_extractor(1,line[1])) == float) : data_section = lines[line[0]-1:line[0]+5] @@ -345,25 +323,74 @@ def net_invest_trial_balance(report: str, save_name: str): 'UNPAID INT' : [], 'NET INV' : [], 'UNEARNED IDC' : [], + "LESSOR": [] } + lessors = [] columns = list(extracted_data_dict.keys()) line0 = list(zip(columns[0:4], [0,3,4,5])) line1 = list(zip(columns[4:12], [i for i in range(0,8)])) line2 = list(zip(columns[12:19], [i for i in range(0,7)])) - line3 = list(zip(columns[19:], [i for i in range(1,6)])) + line3 = list(zip(columns[19:-1], [i for i in range(1,6)])) + + for l in [line0,line1,line2,line3]: + print(f"\n{l}") - data_extractor = create_line_divider([18,35,53,67,87,106,117]) + data_extractor = create_line_divider([18,32,50,66,84,100,117]) for line in enumerate(lines): slot1 = data_extractor(0,line[1],False) if type(slot1) != str : continue if re.search(contract_number_regex, slot1) != None: - data_section = lines[line[0]-1:line[0]+4] - [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] - [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] - [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] - [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3] + data_section = lines[line[0]-1:line[0]+3] + # There were issues were the IL Report would have random blank lines so that needs to be checked + # and adjusted for + # A dead give away of an empty line in a data section is a line without a '.' + # Check the first data line + if data_section[0].find(".") == -1: + # Move it back if empty + data_section[0] = lines[line[0]-2] + # Now we go through each relevant data line and make sure they're not blank + for ds in enumerate(data_section): + if ds[1].find(".") == -1: + if ds[0] < len(data_section) -1: + for i in range(ds[0], len(data_section)-1): + # This allows us to move down all the data lines after a blank data line + data_section[i] = data_section[i+1] + # This handles the last data line which goes 'out-of-bounds' of the existing data selection + data_section[3] = lines[line[0]+3] + else: + data_section[3] = lines[line[0]+3] + # Now that the datasection is sorted we can extract the data + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0], False)) for c in line0] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1], False)) for c in line1] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2], False)) for c in line2] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3], False)) for c in line3] + extracted_data_dict["LESSOR"].append(extracted_data_dict["LEASE NUMBER"][-1][0:3]) + # We keep track of when we see new lessors for a summary tab + if extracted_data_dict["LESSOR"][-1] not in lessors: + lessors.append(extracted_data_dict["LESSOR"][-1]) + dataframe = pd.DataFrame(extracted_data_dict) - dataframe.to_excel(save_name, index=False) + + summary_series = [] + for lessor in lessors: + reduced_df = dataframe.loc[dataframe["LESSOR"] == lessor] + # Delete columns that are strings as we don't need to sum them + del reduced_df["CUSTOMER NAME"] + del reduced_df["LEASE NUMBER"] + del reduced_df["CONTRACT STAT"] + reduced_df = reduced_df.replace("", np.NaN) + # There can sometimes be REVOLVING ACCOUNT over part of the data + # Just get rid of it + reduced_df = reduced_df.replace("REVOLV", np.NaN) + reduced_df = reduced_df.replace("ING ACCOUNT", np.NaN) + summation = reduced_df.sum(skipna=True, axis=0) + summation["LESSOR"] = lessor + summation["CONTRACT COUNT"] = len(reduced_df.index) + summary_series.append(summation) + summary_df = pd.concat(summary_series, axis=1).transpose().set_index("LESSOR") + with pd.ExcelWriter(save_name) as writer: + dataframe.to_excel(writer, index=False, sheet_name="data") + pd.DataFrame(summary_df).to_excel(writer, index=True, sheet_name="Summary") return dataframe @@ -493,21 +520,21 @@ def renewal_net_invest_trial_balance(report: str, save_name: str): 'CUSTOMER NAME' : [], 'TYPE' : [], 'GROSS RENEWAL' : [], - 'CUR RENT RCVB' : [], - 'UNEARNED RIN' : [], + 'REMAINING BAL' : [], + 'FINANCED RES' : [], 'REMAINING RES' : [], 'LEASE PYMTS' : [], 'CONTRACT NUMBER' : [], 'RENEWAL' : [], 'PAYMENTS RCVD' : [], - 'REM RENT RCVB' : [], - 'UNPAID RES' : [], + 'CUR RENT RCVB' : [], + 'UNEARNED RIN' : [], 'SECURITY DEP' : [], 'NET INVEST' : [], 'UNEARN INCOME' : [], 'TOTAL' : [], - 'REMAINING BAL' : [], - 'FINANCED RES' : [], + 'REM RENT RCVB' : [], + 'UNPAID RES' : [], } columns = list(extracted_data_dict.keys()) line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7])) @@ -518,7 +545,19 @@ def renewal_net_invest_trial_balance(report: str, save_name: str): slot1 = data_extractor(0,line[1],False) if type(slot1) != str : continue if re.search(contract_number_regex, slot1) != None: - data_section = lines[line[0]-1:line[0]+4] + data_section = lines[line[0]-1:line[0]+2] + # SEE net_invest_trial_balance FOR EXPLAINATION + if data_section[0].find(".") == -1: + data_section[0] = lines[line[0]-2] + for ds in enumerate(data_section): + if ds[1].find(".") == -1: + if ds[0] < len(data_section) -1: + for i in range(ds[0], len(data_section)-1): + data_section[i] = data_section[i+1] + data_section[2] = lines[line[0]+2] + else: + data_section[2] = lines[line[0]+2] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] diff --git a/RenewalTest.py b/RenewalTest.py index 96b4335..2ec11a4 100644 --- a/RenewalTest.py +++ b/RenewalTest.py @@ -72,11 +72,16 @@ def renewal_net_invest_trial_balance(report: str, save_name: str): if re.search(contract_number_regex, slot1) != None: data_section = lines[line[0]-1:line[0]+2] + if data_section[0].find(".") == -1: + data_section[0] = lines[line[0]-2] for ds in enumerate(data_section): - print(ds[1]) if ds[1].find(".") == -1: - [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)] - print('\n') + if ds[0] < len(data_section) -1: + for i in range(ds[0], len(data_section)-1): + data_section[i] = data_section[i+1] + data_section[2] = lines[line[0]+2] + else: + data_section[2] = lines[line[0]+2] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] @@ -89,4 +94,4 @@ def renewal_net_invest_trial_balance(report: str, save_name: str): with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Renewal Net Investment", errors="replace") as rep_file: report = rep_file.read() -prt(renewal_net_invest_trial_balance(report, "rn_TESTING.xlsx")) \ No newline at end of file +prt(renewal_net_invest_trial_balance(report, "RN_TEST_0606.xlsx")) \ No newline at end of file diff --git a/ach_special.py b/ach_special.py new file mode 100644 index 0000000..6550247 --- /dev/null +++ b/ach_special.py @@ -0,0 +1,87 @@ +import os +import pandas as pd +from datetime import datetime as dt, timedelta +import sys, getopt +import re +from pathlib import Path +import time +from pprint import pprint as prt + +contract_number_regex = "\d{3}-\d{7}-\d{3}" + +def create_line_divider(breakage_list: list): + """ + This allows for the creation of a custom data extractor + Breakage list defines the split points that will be used for the line + Example + Given breakage_list [10, 20, 30] + using slot_num 0 in the resulting extract_line_slot will yield + characters 0 - 10 from the string. + Slot 1 would give characters 10 - 20 + """ + def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): + """ + Pulls data from a line/string using break points defined by the + parent function. + ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' + Will automatically convert numbers to floats + """ + assert(slot_num < len(breakage_list)+1) + low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] + high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + data = line_string[low_range:high_range].strip().replace(",", "") + try: data = float(data) + except: pass + if debug: + print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") + return data + return extract_line_slot + + +def ach(report: str, save_name: str): + + lines = report.splitlines() + extracted_data_dict = { + "ContractNumber" : [], + "CustomerName" : [], + "BankCode" : [], + "BankNumber": [], + "AccountNumber" : [], + "Payment" : [], + "Batch": [], + "Lessor": [], + "PaymentDate": [], + } + columns = list(extracted_data_dict.keys()) + batches = { + "batch_num": [], + "payment_date": [], + "lessor": [], + "count": [], + "total": [] + } + + data_extractor = create_line_divider([19,57,67,82,104]) + bank_number_regex = "\d{9}" + batch_num_regex = "BATCH \d{4} TOTAL" + for line in enumerate(lines): + if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None): + [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)] + if re.search(batch_num_regex, line[1]) != None: + batches["batch_num"].append(line[1][96:101]) + batches["payment_date"].append(lines[line[0]+2][114:125]) + batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3]) + batches["total"].append(float(line[1][107:125].strip().replace(",", ""))) + batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", ""))) + [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))] + [extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))] + [extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))] + + dataframe = pd.DataFrame(extracted_data_dict) + + return dataframe + +with open("/config/workspace/LEAF/IL Extract SRC/2022.05.04_ACH_C") as rep_file: + report = rep_file.read() + +prt(ach(report, "ACH_TESTING.xlsx")) \ No newline at end of file diff --git a/copy.svg b/copy.svg new file mode 100644 index 0000000..1012fc1 --- /dev/null +++ b/copy.svg @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/extract.ico b/extract.ico new file mode 100644 index 0000000000000000000000000000000000000000..faab094f52e1df2a7ae8ba76422413aba6c26f88 GIT binary patch literal 7077 zcmZu$cUTim6TeFWgqlJp^dgE#5u_*xB2}u=r57m{iWH-?0HIew5T#0uNJj(&Ap+6_ zDS{w12!c|BfCOn@-v7V5=kE5onVsF8nP>Jlvj6~6@Bc0k5Cl>%0Kh?Q$C()EFf;Hl zP@Bwpx|+BCqyG26X{jIEhXgkOz*y*MUNgf^ZR9@Ax^pLFMXlY>%UoJe8+xrrOiqyC ze~k|8k&G02+4DlYZzuNa+hN&s{ARRJqO6=3w>+(j&qqfMb+fb9=kp- zVs7unoKBKWPcBH$t$Yn#B`Q41QxG{XUE}{-_)@6^Bk|9@wT0wA-&Nz&<=z%n-(r_h zQHj4|MTlzzIYH@sx3fFu#yjUk>u=#ahkhO&v=|@uVeH+Bazt8Eez-SVp&*J-4z%v9 z>Z#P4B%OhVOE+aNnNat-QH4IKCS%m;?B{a zAJT_~EKKSoepy(!8K*uDE3g;>ScJ|V+%yag=1x_rka1B7T@(=V$O2pGo7j|Y4f=j7 zCbop<5_Q7+TDC&-TvAj`#C9N}PiKdGi5Rzo@2pJrsP^rYQPOZLs)-5v;pLCkIAS*V zt@03eqc{RW)^=QwRBHiZr?#eRoM82@h5CJ?wh;vGFBm%T{J@Nn(gGwy+Z;xE@W_Ds zQ^K;(3%1xmddK5`zC<5IoC`f`Xb*w@%N3oLX?=4hb~Nt(MDek0pkJ}3e3|SlMMvkb zQvxgV?=#N9bmty@HLZQAGa(gdU9yf>72X|MxXAH&NB*LU%9qg+&N}Qi7wl13D(NKQ zn~94TUmUDpV<~kh9rmn@eARz}v=>>lY^maTDfvRIvr|M1UOk(5Iw#z6l(%}Jh5n@gzy zQ2kLE!3Y|-sHcm`i6$QG%bd*L?9=o-szd>Fx9sU`M~k8PETZ_$||lU zu%vJA%S(&EDsq`)(BDsxRDw+Q%khkD%i{jq*o}+Ad-o5_>D4birV;Yrw=rW=y#V;# z0f{Los#*{e*<|hdW9QA?>naRlV3sm-%N0$URSAo`OGcS`0Nx{GyGIf@=Q`pqh6aSD z?)zEik6;5^)8B@O4F9>e+LGqHVf5lSK+5&!qjl|CT>G*-P5f_^^IsrMe#e8Ypb}!j zvwX!UG%efv>QV##()iWD<_FUGoj*-Ah6h6<+~vUgP=29(_Zsr?JqwSWB3MB^1o@SA z=H0BG=MhVY;(Ty#3oZqzpe>*LefgoQ zOaEeJ3nB$!@%U(IU@Te3MY%(gV^M>K5+peJO4rGS$k*?TAZ~f>5Q<|$Vp!M_!zZ2- z&k2GbGM58~opNlk1UY0IVFi8*=1ry(Y`9=Vy9>#E;4gaC1j?Lb zsrIB7h|QGY*n%0Sib`gs2E-a+a$`)KKZpW*1y5LsmY(ezDZnF$=xsV=;V~q2^Wwo8 zuKMkPvNzBM^%)niynK*~9B0G5VmIZ^&Zj-0#Woo_9GjDPJMw@Lj7O=s0tk?&#w=zU zhz^ixo1=^rNEq``2d+|j!orce{Z|&ibfK~P?rmsrZInYJD+S4+a5|=U@K^8QX%v+R zI7x>pF+T(I3j%h))5PtPI<+R>s6>a@WJZAaQY~jHX-AYCN}79o5?>95MVj^ugMvcN zXaLZ^C8-CtBa}F?Zh2B6YEGdGvNGRt&Ma}t}%M2F4d`>+4ZI?+I_z~xPWYU`Wl zU1$=aVT`T(l*lvE>CpU7O~oo6uYALdF{iC?;#Byl(OMeQceA=-(Q0TnX0y3@BZ^(i zfJo~y3+B+N3@?eb>1FL=(zg?u8K~TMqI`K6@1-Mu7)XlxXC3>sHp+`zTiJSpsmppv zDtgveY7zycm76PR*po`E($ZU;ywqTGbhh-ZSXzM)oJ<_0BGF;M9O$=imf2wt)Ml$4zQ1nkZy!o4HoVedT@57|-+f`}}`KDEsoF~~~${(vBCr30_ z6=-uJcS4MXHcfmA86fH&+Vo#eX|Hx2^;9dprFgn3`!Lop9KC4J&-6W7{#%Slk&vXA z)Ve-*R3<+n?%R_@6aj~G8==3<9_+E#=R>)!F1Ke|Dm|@HE3x}{k4cG}k{-D4H1d;% zOX3kjhX^q!Jb!&Lf9?icS^=HtP}M$Z2zz;EmR;`eKD<_RdhnE}{5Dq@Su&1Y<)Yri4>!YCCICO7b%n$ID|Tx2}ULF{8BPL>6)5-u9%8?$++>j+>A%V->bja|)jutYpP z@%sjKuexg&T%xLS7#svXxdAk6gBDzOilW_?6NjeN^9eHH>t}nZ!)i=itDr} zsk_}xD=GWLq+9y0%!xHU@mfmaqEN!vb-wrehQ#i`P^U5I%9_zy%E4dF8R231PVd*g zlOI@QU^%ps4;EFoU{V3@I-}iQ)_vEaU4wi`<9OFdBFQ}6w>ki+4Sw;b_DCYb~PHG zqtd_J?XZG_XtDB-Id0pSM6xHTh?vPSyW&SYWU`^FAwan!sS>8M(&b1p$4<85Q$sFDBX39Z(Uvk`cr*wee3M&{Sk3w!#A* z3-;vjQbfr=AHVN0#M`nP2aZz2fI=G@$GN~im$+6;@lcKI6>MJ_d|_)-U(%&#hc57K z&3}`paSV;Q>qWYR%SWRJRa8HpHw40ROMX`ZFUoRucg~9e8Uyvfz?%KZq9P(ZO7Geu zFVQY~u6utj29P93{ql?xmtRFzwZqi?sa%Jy8{63(Od(1VM0TRuuq`k>4S!mt-LZGs z`v?t;R6>T+z6LXEU%58fD)+U*!X|tOGR>zXgGMZ#`4qkp{?5)Zk&b zMFqsQi{rd)w88PthV9|G`1J{Q(+Zxhi<{n>;k%;1toGbp@N7~uTCC-^6=*T*tLh&al;^n_=~#ln+APw zxJ%1+d2dA)+k08buMZ6Jov(P8r{61u0hk(RRewuH)s9?lXrr-M+j%Yb{7_1(?5fk) zTAYou!Tqp!*Ad}?#7}f%HPLG}vu=B-NspTLGQzr9t5w!wS$eNt1qlci{6!g=6(5o&iA!U2=w%fkW38SzMh}C;8~~p;F~!~VLGw*@bJrbW52ASZ_0Q7 zUg6sbMs!M?T(4|<@HrvBZINRq?stI7hlls2A8bUO4DRmtg;q7R2WDS7N`zu?TR1h7 z(2xh_L7D{@r~pE37}1)JL~b{9O(ZoCp7xjq%%#2gvedtEp0lyv@PRU!mBdQii#q#O z3wGNJm;Rj3?)}m+-sHv>9fx^kG~NU1Lgd7?LjoO5Y`)({c{H23?VYm>pb`2QLERHS z)FZKqHE25h+gpN-WeI+_LLh%dBi)2=#g6+vUzOQz$Pc_IzY*4@0_M-Yodz7 z%&N3!&yzlajjM=*-YLfPOjo&30Od9@y| zfiVli+>i52w&hRQowaTAf*!~zHDq*-v*SE%Z&AHEG-^`Z5}I#VVRVH9G8Mv$MUlmc z4uDR1RaBNUqm1q^(fn{JWtLV>6rjXQ@y4qT(Xs9Op7lZBun>NqPER@ojoCM5>A@at zTt37hW=i!AQ=PmGF1NWV2<9j)3@CZUy>MsgizBu}5>SPQSQaeCRV|{eYp~CBr+8CK zSeC)C=A9NaLSg@15<)?_3AO`+N|y?k#{5JVPWN6PyE4SAdr|yCp@G<&t>3-O0Ryv} zHVx;|cNe(LBvi7H^+d0S!1k4ygedz9x@4tBGL5Wr9(^Zi1=X z30~sw6{J99VaSFJL=c5km4cuFDuB94jdJTTiK<@%8Al8uNV~&K06xP1r!Yw$pbCQs zf>~db?#RQ0-ltqfMRz9X0fFekBKSyBBGd(q3#rdgaMeH+UO&na{+17O^j2ELV?8ze zMG$Tw4Np_Wbs#c>6lP;px;*nMD7*gj->!Vdi}8;!QNU72fTSY)rfG(vy1$4qP>ew6 z1eo50tIrQ1E;w8T)=g|scHK&3L0m3oB1uaK2;6M!WL$A4K55__xue2+uO-Tf7E(^xMOH>ss| z&Q>1c=>C9+>pFgw#b^+Fra9aBXnvymnL`FEfKwUj8|0-hpuDo#3iWuKn7L|oQ<}BJ zfQBhE`{8O4X?3&w7@SSG$D*B%7!dex>d((WjZ#OU@&U!O%k`t<+s&sVR0#gkXm+$6 zVlx)4sr8+~mc><8rzOD%ST|NAsZX6Ag%BI=>J%2vKq_c-5tna3hM&QK?_J@5rOEwQ zY=s&sEWxPWUS^xCqM%3gRJkFn@!Y{VpQNdD0+(f$FQ@u@to$k_Nk!R%h5VSo@G356829i-$u$7s~i+e=6==^pNCtP=f=SwBPk&ydxG{BVq^%EpL;dS9E^@) z;oO#G$+6@Kds&vzy#0%&;9Z^Bt(Y1~?CYuQ*u(_c_V>Xq?p^GE?}cfq!Op%$usoA? zxIXDxtGQvMNlXb(r@t8{vH0(K7nMk;oPsSQg=urI>xv3%ia+M3$dGHw?3EX*p8){L zl;txYKIEH*e;Ov-8@yw-)_=3P#fyu9*T5?9@tdhvzko+z8DQf>(0Vep6-CITBYL@1 zl;QkF;;xN^zC*bkH#S|reQlz>4g#oqm8?A!cVbegQaCr&E5_9bioSLl-#vMA9%ztL zP-y!Q=e(bRd_c-3e!?md((>ABM-iq@8&eOil4MA<9bVUjZh!hfeCWt(=H2J*ZRQ6z zNv)cJ1;o@a??mGLCN-wLYW9hAkTQ$sT zMWaSzB*Odm_l?CEDQ<{-+LY_TLpie7TT>JHAs1rMjm1X^id1Ce$n)-xIP=yKhWH)d zL`}T$?&15lJNWKgO=-)>76Z(Mo9;zBHThj3uiySAxel9Mr&28Fmq#y9>7p)3gpd8wBHO{4puvr;$Djur7ztIyvj-t+O#Y6H zuZwp5X~F&RF-Bm*j2YF7;J`8QM#sekp}O=cS4J`_@y+(!?bUQ(KpiHjsIuJ%<%Cpc ze(+}>LY)bY>38U*KvmT?FJL0XlNTG2D&dFY^6#Umdc_1rCWaZmg;}IBv&c))9i1W5 zJPWH?RbdJy?yNu2zePpkj;CXEbRC6`XEPSznCLk|+G|lYV2&=--TKwV6V;JXOQZM$)vLgTso{IT0A}IcF+h9>zv*=Htj2TcafZ=uA`pqEZ;zQ_ z+6i8DB}}q1gMUAE{p?9y`s53iMf)<4&yma}a?dskP}+%>XIb`&^BmtO5?n=r`ZA|# zfT$M*&nZZYi+9qXG;U?Kr+!wGlZRnJYGh$A(iL2aP~&dm%@v9bXQBk(6rUe=b=c{J zW(nbI>Dh^2H1zok`l@ZqgR7cA!z)Gw8p(ajXMm_=ynU1NED`F{zAw zcB8aX1;|M_9Jhjhe`8N22BP4fR&*dM$V{e7#dlRtAvuw9w(so!%UI-u9hI>VCtV*m zAy|9ys3*NK+GyJjyx-8}vtOk@E(Q_`kwBbN^G*<+ZPfc#-@y5K@aiVFLqko`^S9EU zPOw&DBH%BTuU}BDFOd;PDe%1TEY%^Th3(brz2Bg{Kc53#(vWjbo~x0>ZHZj#ZB>$~ zUfO)#N?c_65`7={ODcorz}uwj)es}PIr>)n5;+lmXce{4!ha#`HFfSL+-KUA)PF=T zI#HdNRY+&y6vkpgzmbtkvBZ6dqP?y2H+D75zpEUeD7!cN9BCV%x~IND35;l zS#M5LiTH;l;?c0a0ni9ti9ANmgpAch&9Txeivg+zAwGI8wbExQIC2b?oUGRV^yG_b zt{(6%Om4COGrA5rE}zHO^noYZ@IVX)FHJii)|UL z5cumRN(LdG0MP*iL?khRtc~cj>idu^-a*0zgWu^b(jYPj3zZhD2(SK?)^ms^a}vLP zN+Y=KiviA;+m_ktHuf{U9l#1OwCG1lfVdCCCxyAOk7LxqyV^X-v_|5t(p9Ud#l%$C zpAsh30_QKc^K&!DABB+O7}WYj*ku11%qGIr2NI)I_NLE0oUnYx_em~wb)XHVE^o*( zc0Tx&74L*g1!ApJQ3_woGA^f{^RJ`wC+FPVFW^qSM-fn*ZQFa>uXHD~<8E5^LvdZ8 zD+gB4Py4I|Cip}i7Hb2PuY+cGL~8n3w8X9`NcA}N-5OLMlq!>-0}uj{^$q`|veeVD z)M8+wt}dV{ExeruQ1MnQ_XAjbsz!$0&VPtW;@(l+31tK+_)ui@LxP1cLY;O@!XI4& z>4m1k-^N$`*r#gBnZ@i_G7_d9t87B`x|C%=xa-kzb$?H|xG1nKf)MoLBh-P0O+ix3 z)X~2)s%NO;Vg?89oT35QpmH8I|MNZ=WLiE0JibyA18|&8*ilQ zV_hav<#Wwat?s7Bv(#TZ+Z`kUfFBZy_s#zpdnonXL6xeK0dR<~9;OjbuAjCtF2Yx| zUBQ*%l8A`#d}s=KG{f6{YPbtD&Y4=$SRV)|Co;T(+C04_zzZmGHGx<^#y00-8A?)A z_~{igABi*SsrRFLzU3h0Z~0=5MjMGq<%IbER-XsC%0R74JxT6v%ayU1XPBW2=A5Ke zd3(v_u(~)Xv^U;P<>7B?C1Fs>DB>w*{-XOOA5ex25%?VLOVZOAYGbxC_F2rQ<@dio zP)f6=EE?4nA(vHQRz0Wh0Y}POb5V`%iGW%_#Uc7Qnos%s(9H`>M>hDr=AQ7F0fM5@ z7sKaSQ{{sau2kc`_(uZbl$$Y+y+q?q(LSO+f=+M9?a<^8BXf`lq|L8~)AP8W0of zGjw#i_fMZH3`QQ8y=geM_|NSdGJUIPAtxsRd=Z>Zxv;lfrMw(R`juHO@H^;&` literal 0 HcmV?d00001 diff --git a/extract.svg b/extract.svg new file mode 100644 index 0000000..c392432 --- /dev/null +++ b/extract.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/folder.svg b/folder.svg new file mode 100644 index 0000000..5488499 --- /dev/null +++ b/folder.svg @@ -0,0 +1,2 @@ + + diff --git a/main.py b/main.py index a0577f2..bd84bd8 100644 --- a/main.py +++ b/main.py @@ -28,16 +28,20 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file') self.inputFileLE.setText(inFile[0]) if inFile[0] == '' : return '' - print(f"Input File: {inFile}") + # Replacing errors fixes some UTF-8 character issues with open(inFile[0], errors="replace") as inF: txt = inF.read() - #print(txt) self.inputFilePreview.setText(txt) self.inputFile = inFile[0] + # This gets the actual file name inFileEnd = inFile[0].split('/')[-1] + # Takes just the root of the input file outputRoot = self.inputFile.removesuffix(inFileEnd) + # Automatically sets output to be in the same file as input, with a naming scheme + # The report type selected in the combo box will dictate the naming self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.xlsx" self.outputFileLE.setText(self.outputFile) + # Check to make sure the user has selected the correct report type if self.reportTypeCB.currentText().split(" ")[-1].lower() not in self.inputFile.lower(): print("Possibly wrong file type") warning = QtWidgets.QMessageBox() @@ -45,9 +49,11 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): warning.setText(f"Selected report type is {self.reportTypeCB.currentText()} but input file did not contain '{self.reportTypeCB.currentText().split(' ')[-1].lower()}'!\n\ Make sure you select the correct report type before processing!") s = warning.exec() + # Enables the process button self.check_ready_to_process() def setOutput(self): + # This allows the user to change the automatic naming and location outFile = QtWidgets.QFileDialog.getSaveFileName(self, "Output file name") if outFile[0] == '': return '' self.outputFileLE.setText(f"{outFile[0]}__{dt.now().strftime('%Y%m%d_%H_%M')}.xlsx") @@ -56,15 +62,17 @@ Make sure you select the correct report type before processing!") self.check_ready_to_process() def check_ready_to_process(self): + # Makes sure there is an input and output selected before allowing processing self.rtp = True if ((self.inputFile != "") & (self.outputFile != "")) else False if self.rtp : self.processReportButton.setEnabled(True) def process_selection(self): self.inputFilePreview.setText("Processing file...") - with open(self.inputFile, errors="replace") as inF: - reportString = inF.read() + # If only this was python 3.10 and we could use switch statments + # but this should get the job done try: + # Here we set the extraction function that will be used on the report if self.reportTypeCB.currentText() == "ACH": extract_function = ilx.ach elif self.reportTypeCB.currentText() == "Disposition": @@ -91,18 +99,17 @@ Make sure you select the correct report type before processing!") extract_function = ilx.payment_transactions elif self.reportTypeCB.currentText() == "Returned Check": extract_function = ilx.payment_transactions - + # This is where the actual processing happens + # We create an ILReport object and pass in the nessecary information dataframe = ilx.ILReport( location= self.inputFile, extraction_function=extract_function, output_location=self.outputFile, ).process() - - #dataframe.to_excel("test_name.xlsx", index=False, engine="xlsxwriter") + # The text preview box can have trouble loading the larger dataframes so + # they are trimmed to 500 so that the users can see if anything got messed up smallDF = dataframe.iloc[0:500,:] self.inputFilePreview.setText(smallDF.to_html(index=False)) - print("Fin") - self.openReportButton.setEnabled(True) except: error = QtWidgets.QMessageBox() error.setWindowTitle('Error Processing File!') diff --git a/mainWindow.ui b/mainWindow.ui new file mode 100644 index 0000000..3fad271 --- /dev/null +++ b/mainWindow.ui @@ -0,0 +1,268 @@ + + + MainWindow + + + + 0 + 0 + 1001 + 664 + + + + MainWindow + + + + + + 20 + 220 + 951 + 391 + + + + + + false + + + + 20 + 180 + 250 + 36 + + + + &Process Report + + + + + false + + + + 280 + 180 + 241 + 36 + + + + &Copy to Clipboard + + + + + + 21 + 90 + 951 + 84 + + + + + + + + + + 250 + 0 + + + + + 250 + 36 + + + + Select &InfoLease Report + + + + ../../.designer/backup/Pictures/svgs/folder.svg../../.designer/backup/Pictures/svgs/folder.svg + + + + + + + true + + + No file selected + + + + + + + + + + + + 250 + 0 + + + + + 250 + 36 + + + + Select &Report Output Location + + + + ../../.designer/backup/Pictures/svgs/folder.svg../../.designer/backup/Pictures/svgs/folder.svg + + + + + + + true + + + No location selected + + + + + + + + + + + 21 + 51 + 250 + 37 + + + + + ACH + + + + + Disposition + + + + + Gain Loss + + + + + Lock Box + + + + + Minv_C + + + + + Net Inv. Loans + + + + + NI Renewal + + + + + NIV After + + + + + PBP / Epay + + + + + Returned Check + + + + + Unapplied + + + + + VMCC + + + + + Wires + + + + + + + 21 + 21 + 144 + 24 + + + + + 14 + 75 + true + + + + Infolease Report + + + reportTypeCB + + + + + + + 0 + 0 + 1001 + 29 + + + + + + + reportTypeCB + inputFileButton + outputFileButton + processReportButton + openReportButton + inputFileLE + outputFileLE + inputFilePreview + + + + diff --git a/process.svg b/process.svg new file mode 100644 index 0000000..e20b6ba --- /dev/null +++ b/process.svg @@ -0,0 +1 @@ + \ No newline at end of file