commit
87667c0fa3
@ -0,0 +1,525 @@ |
|||||||
|
import os |
||||||
|
import pandas as pd |
||||||
|
from datetime import datetime as dt, timedelta |
||||||
|
import sys, getopt |
||||||
|
import re |
||||||
|
from pathlib import Path |
||||||
|
import time |
||||||
|
|
||||||
|
contract_number_regex = "\d{3}-\d{7}-\d{3}" |
||||||
|
|
||||||
|
|
||||||
|
class ILReport: |
||||||
|
""" |
||||||
|
InfoLease Report class will be used to work with the files. |
||||||
|
It makes it easier to add new reports to the workflow and to make it more clear where |
||||||
|
the reports are coming from. It also helps with tracking reports that may not be ready yet. |
||||||
|
""" |
||||||
|
def __init__(self, location, extraction_function = None, output_location = None, output_name = None): |
||||||
|
# The location where the InfoLease report is stored |
||||||
|
self.location = location |
||||||
|
# The base name of the file, corresponds to the report type |
||||||
|
# If output location not specified, save to the input location |
||||||
|
if output_location == None: |
||||||
|
self.output_location = Path(location).parent.absolute() |
||||||
|
else: |
||||||
|
self.output_location = output_location |
||||||
|
# This is optional but has a default |
||||||
|
if output_name == None: |
||||||
|
# Get the file name of the input and remove the date |
||||||
|
self.output_name = os.path.basename(f"{self.location}")\ |
||||||
|
.replace(f"{(dt.now() - timedelta(days=+1)).strftime('%Y.%m.%d')}","") |
||||||
|
else: |
||||||
|
self.output_name = output_name |
||||||
|
# The function used to extract the data from the report |
||||||
|
self.x_method = extraction_function |
||||||
|
# Tracks whether the data was successfully exctracted |
||||||
|
self.successful = False |
||||||
|
|
||||||
|
|
||||||
|
def run(self) -> int: |
||||||
|
""" |
||||||
|
This method is what actully run the report. I uses the specidied extraction function to create and save an excel document. |
||||||
|
SUCESS returns 0 |
||||||
|
ERROR returns 1 |
||||||
|
Failure is also noted by self.success == False |
||||||
|
""" |
||||||
|
try: |
||||||
|
# Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output) |
||||||
|
with open(self.location, errors="replace") as ifile: |
||||||
|
report = ifile.read() |
||||||
|
except IOError as ioe: |
||||||
|
print(f"Failed to open file: {self.location}\n{ioe}") |
||||||
|
self.successful = False |
||||||
|
return 1 |
||||||
|
try: |
||||||
|
# Run the associated method to extract the data and get the dataframe |
||||||
|
dataframe = self.x_method(report) |
||||||
|
try: |
||||||
|
assert(len(dataframe) > 1) |
||||||
|
except Exception as e: |
||||||
|
print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}") |
||||||
|
self.successful = False |
||||||
|
return 1 |
||||||
|
except Exception as e: |
||||||
|
print(f"{self.output_name} failed to process:\n{e}") |
||||||
|
self.successful = False |
||||||
|
return 1 |
||||||
|
try: |
||||||
|
# Save the dataframe as an excel document |
||||||
|
dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False) |
||||||
|
except Exception as e: |
||||||
|
self.successful = False |
||||||
|
print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}") |
||||||
|
return 1 |
||||||
|
self.successful = True |
||||||
|
return 0 |
||||||
|
def process(self): |
||||||
|
try: |
||||||
|
# Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output) |
||||||
|
with open(self.location, errors="replace") as ifile: |
||||||
|
report = ifile.read() |
||||||
|
except IOError as ioe: |
||||||
|
print(f"Failed to open file: {self.location}\n{ioe}") |
||||||
|
self.successful = False |
||||||
|
return 1 |
||||||
|
try: |
||||||
|
# Run the associated method to extract the data and get the dataframe |
||||||
|
dataframe = self.x_method(report) |
||||||
|
try: |
||||||
|
assert(len(dataframe) > 1) |
||||||
|
except Exception as e: |
||||||
|
print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}") |
||||||
|
self.successful = False |
||||||
|
return 1 |
||||||
|
except Exception as e: |
||||||
|
print(f"{self.output_name} failed to process:\n{e}") |
||||||
|
self.successful = False |
||||||
|
return 1 |
||||||
|
return dataframe |
||||||
|
|
||||||
|
|
||||||
|
def create_line_divider(breakage_list: list): |
||||||
|
""" |
||||||
|
This allows for the creation of a custom data extractor |
||||||
|
Breakage list defines the split points that will be used for the line |
||||||
|
Example |
||||||
|
Given breakage_list [10, 20, 30] |
||||||
|
using slot_num 0 in the resulting extract_line_slot will yield |
||||||
|
characters 0 - 10 from the string. |
||||||
|
Slot 1 would give characters 10 - 20 |
||||||
|
""" |
||||||
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): |
||||||
|
""" |
||||||
|
Pulls data from a line/string using break points defined by the |
||||||
|
parent function. |
||||||
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' |
||||||
|
Will automatically convert numbers to floats |
||||||
|
""" |
||||||
|
assert(slot_num < len(breakage_list)+1) |
||||||
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] |
||||||
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] |
||||||
|
data = line_string[low_range:high_range].strip().replace(",", "") |
||||||
|
try: data = float(data) |
||||||
|
except: pass |
||||||
|
if debug: |
||||||
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") |
||||||
|
return data |
||||||
|
return extract_line_slot |
||||||
|
|
||||||
|
|
||||||
|
###################################################################################################################### |
||||||
|
# # |
||||||
|
# EXTRACTION FUNCTIONS: used to pull data out of specific InfoLease report types # |
||||||
|
# # |
||||||
|
###################################################################################################################### |
||||||
|
""" |
||||||
|
COMMON EXTRACTION COMPONENTS/FEATURES: |
||||||
|
- lines = report.splitlines() : splits the reports into a list of lines (based on \n line breaks in document) |
||||||
|
|
||||||
|
- extracted_data_dict : this is a dictionary that will hold the extracted data and will be used to create the dataframe |
||||||
|
|
||||||
|
- columns = list(extracted_data_dict.keys()) : breaks the extracted_data_dict into a list of its keys (excel column heads) |
||||||
|
|
||||||
|
- data_extractor = create_line_divider([#,#,#,#,#]): This creates a function we can use to pull data from a line based on |
||||||
|
its 'slot position'. A slot position is the characters between the numbers specified in the list passed into the function |
||||||
|
|
||||||
|
- for line in enumerate(lines): iterates through each line in the document. Line is a tuple of (line number, line string) |
||||||
|
having the line number can be very useful when we need to access data in adjacent lines |
||||||
|
|
||||||
|
- line# = list(zip(columns[#:#],[i for i in range(#,#)])): This creates a list with the tuple (column name, slot number). |
||||||
|
It allows us to iterate through this list and make sure the correct data slots are being used for each column/key in the |
||||||
|
data dictionary |
||||||
|
|
||||||
|
COMMON REGEX COMPONENTS |
||||||
|
\d : any digit [0-9] |
||||||
|
\D : any character that is not a digit |
||||||
|
\s : whitespace |
||||||
|
. : any character besides newline (\n) |
||||||
|
{#}: # number of the preceding character |
||||||
|
* : 0 or more repetitions of the preceding character |
||||||
|
""" |
||||||
|
|
||||||
|
|
||||||
|
def ach(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"ContractNumber" : [], |
||||||
|
"CustomerName" : [], |
||||||
|
"BankCode" : [], |
||||||
|
"BankNumber": [], |
||||||
|
"AccountNumber" : [], |
||||||
|
"Payment" : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
data_extractor = create_line_divider([19,57,67,82,104]) |
||||||
|
bank_number_regex = "\d{9}" |
||||||
|
for line in enumerate(lines): |
||||||
|
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None): |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns))] |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
|
||||||
|
def disposition(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"ContractNumber" : [], |
||||||
|
"Amount Rec" : [], |
||||||
|
"Trans Num" : [], |
||||||
|
"Date RCVD": [], |
||||||
|
"Date Posted" : [], |
||||||
|
"Last Pymt Due" : [], |
||||||
|
"Date Due" : [], |
||||||
|
"Residual Amt" : [], |
||||||
|
"Term Date" : [], |
||||||
|
"Total Pastdue" : [], |
||||||
|
"Customer Name" : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
data_extractor = create_line_divider([15,32,41, 51, 61, 79,88, 103, 114]) |
||||||
|
for line in enumerate(lines): |
||||||
|
if re.search(contract_number_regex, data_extractor(0,line[1])): |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)] |
||||||
|
extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip()) |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
|
||||||
|
def gainloss(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
'REM RENT RCVB' : [], |
||||||
|
'GUAR RESIDUAL' : [], |
||||||
|
'ASSET VAL' : [], |
||||||
|
'EQUITY ADDON' : [], |
||||||
|
'CURR INT RCVB' : [], |
||||||
|
'MISC G/L' : [], |
||||||
|
'BLENDED INC' : [], |
||||||
|
'CONTRACT NUMBER' : [], |
||||||
|
'CURR RENT RCVB' : [], |
||||||
|
'RESIDUAL' : [], |
||||||
|
'END/SEC DEP' : [], |
||||||
|
'SALES TAX' : [], |
||||||
|
'INVENT CHANGE' : [], |
||||||
|
'NET RESERVE' : [], |
||||||
|
'LATE CHGS' : [], |
||||||
|
'CUSTOMER NAME' : [], |
||||||
|
'UNEARNED FIN' : [], |
||||||
|
'UNAMORT RES' : [], |
||||||
|
'MISC' : [], |
||||||
|
'MISC TAX' : [], |
||||||
|
'CASH RECEIVED' : [], |
||||||
|
'RCV OFFSET' : [], |
||||||
|
'GAIN/LOSS' : [], |
||||||
|
'DISPOSITION CODE' : [], |
||||||
|
'DISPOSITION DESC' |
||||||
|
'UNEARNED IDC' : [], |
||||||
|
'UNPAID INT' : [], |
||||||
|
'PENALTY FEE' : [], |
||||||
|
'UNPAID ACCRD' : [], |
||||||
|
'RENEWAL RCVBL' : [], |
||||||
|
'DEF REN INC' : [], |
||||||
|
'DEF REN INT' : [], |
||||||
|
'EARNED IDC' : [], |
||||||
|
'GST BOOK G/L' : [], |
||||||
|
'UNRECOG GST' : [], |
||||||
|
'INT EARNED' : [], |
||||||
|
'OVER/SHORT' : [], |
||||||
|
'OPER RCVB' : [], |
||||||
|
'OPER BASIS' : [], |
||||||
|
'CTD OPER DEPR' : [], |
||||||
|
} |
||||||
|
# L0: BlendedInc 6 |
||||||
|
# L1: Late CHGS 14 |
||||||
|
# L2: Gain/Loss 22 |
||||||
|
# L3: Def Ren Int 30 |
||||||
|
# l4 Over/Short 35 |
||||||
|
# L5: CTD OPER |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
# These line data are used to tell the data extrator which values to pull for each line of |
||||||
|
# relevant data. It parits dictionary keys with thier corresponding data slot in the line |
||||||
|
# So that they can be iterated through during data extraction |
||||||
|
line0 = list(zip(columns[0:7],[i for i in range(1,8)])) |
||||||
|
line1 = list(zip(columns[7:15],[i for i in range(0,8)])) |
||||||
|
line2 = list(zip(columns[15:23], [i for i in range(0,8)])) |
||||||
|
line3 = list(zip(columns[23:31], [i for i in range(0,8)])) |
||||||
|
line4 = list(zip(columns[31:36], [i for i in range(1,8) if i not in [3,6]])) |
||||||
|
line5 = list(zip(columns[36:], [i for i in range(1,4)])) |
||||||
|
data_extractor = create_line_divider([27,43,58,74,88,105,120]) |
||||||
|
for line in enumerate(lines): |
||||||
|
if (re.search(contract_number_regex, data_extractor(0,line[1])) != None)&\ |
||||||
|
(type(data_extractor(1,line[1])) == float) : |
||||||
|
data_section = lines[line[0]-1:line[0]+5] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[4])) for c in line4] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[5])) for c in line5] |
||||||
|
|
||||||
|
df = pd.DataFrame(extracted_data_dict) |
||||||
|
# The Accounting team wanted the disposotion code split into number and descriptionso... |
||||||
|
disp_code = [] |
||||||
|
disp_descriptoin = [] |
||||||
|
for d in df['DISPOSITION CODE'].to_list(): |
||||||
|
disp_split = d.split(" ") |
||||||
|
disp_code.append(disp_split[0]) |
||||||
|
disp_descriptoin.append(" ".join(disp_split[1:])) |
||||||
|
df["DISPOSITION CODE"] = disp_code |
||||||
|
df["DISPOSITION DESC"] = disp_descriptoin |
||||||
|
return df |
||||||
|
|
||||||
|
# Works for Net-inv-loans & NIV-after |
||||||
|
def net_invest_trial_balance(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
'CUSTOMER NAME' : [], |
||||||
|
'CURR INT RCVB' : [], |
||||||
|
'UNEARNED BLENDED' : [], |
||||||
|
'BLEND NET INV' : [], |
||||||
|
'LEASE NUMBER' : [], |
||||||
|
'GROSS CONTRACT' : [], |
||||||
|
'CURR RENT RCVB' : [], |
||||||
|
'UNEARN FIN' : [], |
||||||
|
'END DEPOSIT' : [], |
||||||
|
'SEC DEPOSIT' : [], |
||||||
|
'LEASE PYMTS' : [], |
||||||
|
'TOTAL' : [], |
||||||
|
'CONTRACT STAT' : [], |
||||||
|
'PAYMENTS RCVD' : [], |
||||||
|
'REM RENT RCVB' : [], |
||||||
|
'UNEARN RESID' : [], |
||||||
|
'PROV LOSS' : [], |
||||||
|
'NET RESERVE' : [], |
||||||
|
'UNEARN INC' : [], |
||||||
|
'BAL REMAINING' : [], |
||||||
|
'RESIDUAL' : [], |
||||||
|
'UNPAID INT' : [], |
||||||
|
'NET INV' : [], |
||||||
|
'UNEARNED IDC' : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
line0 = list(zip(columns[0:4], [0,3,4,5])) |
||||||
|
line1 = list(zip(columns[4:12], [i for i in range(0,8)])) |
||||||
|
line2 = list(zip(columns[12:19], [i for i in range(0,7)])) |
||||||
|
line3 = list(zip(columns[19:], [i for i in range(1,6)])) |
||||||
|
|
||||||
|
data_extractor = create_line_divider([18,35,53,67,87,106,117]) |
||||||
|
for line in enumerate(lines): |
||||||
|
slot1 = data_extractor(0,line[1],False) |
||||||
|
if type(slot1) != str : continue |
||||||
|
if re.search(contract_number_regex, slot1) != None: |
||||||
|
data_section = lines[line[0]-1:line[0]+4] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3] |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
|
||||||
|
def lockbox(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"CustomerName" : [], |
||||||
|
"PaymentDate" : [], |
||||||
|
"InvoiceNumber" : [], |
||||||
|
"CheckNumber" : [], |
||||||
|
"InvoicePayment" : [], |
||||||
|
"ContractNumber" : [], |
||||||
|
"ContractPayment" : [], |
||||||
|
} |
||||||
|
# These are lists of the dictionary columns/keys and the data slots in which |
||||||
|
# that data can be found in the report. this way we can iterate through them |
||||||
|
# While extracting data |
||||||
|
bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]] |
||||||
|
infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]] |
||||||
|
|
||||||
|
# Below are the Regular Exppressions used to find relvant data lines |
||||||
|
full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1" |
||||||
|
contract_only_line = "\s{90}\d.{7}1\d{2}-" |
||||||
|
cust_name_line = "\s{98}.{28}\D*" |
||||||
|
# The data extractor allows us to extract data from the report using slots |
||||||
|
# Slots are ranges of character denote by the list feed into the creation function |
||||||
|
data_extractor = create_line_divider([9,19,39,56,69,90,98,118]) |
||||||
|
for line in enumerate(lines): |
||||||
|
# We can skip empty lines |
||||||
|
if len(line[1]) == 0: continue |
||||||
|
# First we should check if there is a full line of data (defined by regex) |
||||||
|
if re.search(full_line, line[1]): |
||||||
|
# If this is true then we can iterate through the lists we created earlier and append the data to our dict |
||||||
|
for k in range(0,len(bank_payment_records[0])): |
||||||
|
extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1])) |
||||||
|
for k in range(0,len(infolease_payment_records[0])): |
||||||
|
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) |
||||||
|
# Otherwise we should check if this is a line with only contract data |
||||||
|
elif re.search(contract_only_line,line[1]): |
||||||
|
# If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract |
||||||
|
for k in range(0,len(bank_payment_records[0])): |
||||||
|
extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1]) |
||||||
|
for k in range(0,len(infolease_payment_records[0])): |
||||||
|
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1])) |
||||||
|
# If it doesn't hit either of these critera then continue since it's irelevant data |
||||||
|
else: continue |
||||||
|
i = 1 |
||||||
|
# used to track how many lines below the current line we're looking for the customer name |
||||||
|
# keep moving down a line and checking for a customer name |
||||||
|
# Customer name typically happens 1 line under data but can be 13 lines if cut off by page end |
||||||
|
while re.search(cust_name_line,lines[line[0]+i]) == None: |
||||||
|
i += 1 |
||||||
|
# Once it hits, add the name to the dict |
||||||
|
extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i])) |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
|
||||||
|
def minv(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
data_extractor = create_line_divider([15,32,52,71,83,107,116,128]) |
||||||
|
extracted_data_dict = { |
||||||
|
"ContractNumber" : [], |
||||||
|
"UTAB_OIC_DUE" : [], |
||||||
|
"RentalDue" : [], |
||||||
|
"UTAB_OIC_PYMT" : [], |
||||||
|
"ChargeType" : [], |
||||||
|
"OutstandBalance" : [], |
||||||
|
"BizSegment" : [], |
||||||
|
"BookingDate" : [], |
||||||
|
"Branch" : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
for line in enumerate(lines): |
||||||
|
if re.search(contract_number_regex, line[1]) != None: |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))] |
||||||
|
#All the list lengths need to be the same so if anything was missed it will fail to build |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
# Good for PUB_WIRES, VMCC, PBP_EPAY, returned check |
||||||
|
def payment_transactions(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
data_extractor = create_line_divider([6,33,52,62,80,89,110,121]) |
||||||
|
extracted_data_dict = { |
||||||
|
'SEQ' : [], |
||||||
|
'ACCOUNT NUMBER' : [], |
||||||
|
'PYMT METHOD' : [], |
||||||
|
'DATE RCVD' : [], |
||||||
|
'AMOUNT' : [], |
||||||
|
'REF NO': [], |
||||||
|
'PAYMENT MEMO' : [], |
||||||
|
'PYMT TYPE' : [], |
||||||
|
'CHECK NO' : [], |
||||||
|
'CUSTOMER NAME' : [], |
||||||
|
'TRANSACTIONS NUM': [], |
||||||
|
'INV NO' : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
transaction_num_regex = "\d{8}" |
||||||
|
for line in enumerate(lines): |
||||||
|
slot1 = data_extractor(1,line[1],False) |
||||||
|
if type(slot1) != str : continue |
||||||
|
if re.search(contract_number_regex, slot1) != None: |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)] |
||||||
|
tnum_match = re.search(transaction_num_regex, lines[line[0]+1]) |
||||||
|
if tnum_match: |
||||||
|
tnum = lines[line[0]+1][tnum_match.start():tnum_match.end()] |
||||||
|
else: |
||||||
|
tnum = "" |
||||||
|
extracted_data_dict["TRANSACTIONS NUM"].append(tnum) |
||||||
|
cname = lines[line[0]+1][6:37].strip() |
||||||
|
extracted_data_dict['CUSTOMER NAME'].append(cname) |
||||||
|
inv_no = lines[line[0]+1][79:90].strip() |
||||||
|
extracted_data_dict['INV NO'].append(inv_no) |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
|
||||||
|
def renewal_net_invest_trial_balance(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
data_extractor = create_line_divider([21,29,43,58,71,88,99,113]) |
||||||
|
extracted_data_dict = { |
||||||
|
'CUSTOMER NAME' : [], |
||||||
|
'TYPE' : [], |
||||||
|
'GROSS RENEWAL' : [], |
||||||
|
'CUR RENT RCVB' : [], |
||||||
|
'UNEARNED RIN' : [], |
||||||
|
'REMAINING RES' : [], |
||||||
|
'LEASE PYMTS' : [], |
||||||
|
'CONTRACT NUMBER' : [], |
||||||
|
'RENEWAL' : [], |
||||||
|
'PAYMENTS RCVD' : [], |
||||||
|
'REM RENT RCVB' : [], |
||||||
|
'UNPAID RES' : [], |
||||||
|
'SECURITY DEP' : [], |
||||||
|
'NET INVEST' : [], |
||||||
|
'UNEARN INCOME' : [], |
||||||
|
'TOTAL' : [], |
||||||
|
'REMAINING BAL' : [], |
||||||
|
'FINANCED RES' : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7])) |
||||||
|
line1 = list(zip(columns[7:16], [i for i in range(0,9)])) |
||||||
|
line2 = list(zip(columns[16:], [3,4])) |
||||||
|
|
||||||
|
for line in enumerate(lines): |
||||||
|
slot1 = data_extractor(0,line[1],False) |
||||||
|
if type(slot1) != str : continue |
||||||
|
if re.search(contract_number_regex, slot1) != None: |
||||||
|
data_section = lines[line[0]-1:line[0]+4] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] |
||||||
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
|
|
||||||
|
def unapplied(report: str): |
||||||
|
lines = report.splitlines() |
||||||
|
extracted_data_dict = { |
||||||
|
"Trans Num" : [], |
||||||
|
"ContractNumber" : [], |
||||||
|
"CheckNum" : [], |
||||||
|
"Date RCVD" : [], |
||||||
|
"Asset ID": [], |
||||||
|
"Reversed Amt" : [], |
||||||
|
"Branch" : [], |
||||||
|
"Unapplied Susp Acct" : [], |
||||||
|
"PaymentMemo" : [], |
||||||
|
"Payers Name" : [], |
||||||
|
"Batch Num" : [], |
||||||
|
"Posting Date" : [], |
||||||
|
"Unapplied Amt" : [], |
||||||
|
"Rev Post Date" : [], |
||||||
|
"Ref Num" : [], |
||||||
|
"Check Amt" : [], |
||||||
|
"Reason Code" : [], |
||||||
|
} |
||||||
|
columns = list(extracted_data_dict.keys()) |
||||||
|
# Iterate through the lines one at a time to look for relavant data |
||||||
|
# Use enumerate so that we know which line we're currently working on |
||||||
|
# this allows us to also work in the 'report' structure so that we can |
||||||
|
# grab the customer name from the line proceding the data |
||||||
|
data_extractor = create_line_divider([9,25, 38, 50, 65, 80, 89, 108]) |
||||||
|
trans_num = "\d{7}" |
||||||
|
for line in enumerate(lines): |
||||||
|
if (re.search("\d{7}", str(data_extractor(0,line[1],debug=False))) != None) &\ |
||||||
|
(re.search("\d{2}/\d{2}/\d{4}", str(data_extractor(3,line[1],debug=False))) != None): |
||||||
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0,9)] |
||||||
|
[extracted_data_dict[columns[8+c]].append(data_extractor(c,lines[line[0]+1])) for c in range(1,len(columns)-8)] |
||||||
|
return pd.DataFrame(extracted_data_dict) |
||||||
|
|
||||||
@ -0,0 +1,123 @@ |
|||||||
|
from mainWindow_new import Ui_MainWindow |
||||||
|
import sys |
||||||
|
import os |
||||||
|
import pandas as pd |
||||||
|
from PyQt5 import QtWidgets |
||||||
|
from datetime import datetime as dt |
||||||
|
import ILExtract as ilx |
||||||
|
|
||||||
|
|
||||||
|
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): |
||||||
|
def __init__(self, *args, obj=None, **kwargs): |
||||||
|
super(MainWindow, self).__init__(*args, **kwargs) |
||||||
|
self.setupUi(self) |
||||||
|
|
||||||
|
self.inputFile = "" |
||||||
|
self.outputFile = "" |
||||||
|
self.rtp = False # Ready to Process |
||||||
|
self.ofa = False # Output file ready |
||||||
|
|
||||||
|
# Actions |
||||||
|
self.inputFileButton.clicked.connect(self.getfile) |
||||||
|
self.outputFileButton.clicked.connect(self.setOutput) |
||||||
|
self.processReportButton.clicked.connect(self.process_selection) |
||||||
|
self.openReportButton.clicked.connect(self.to_clipboard) |
||||||
|
|
||||||
|
|
||||||
|
def getfile(self): |
||||||
|
inFile = QtWidgets.QFileDialog.getOpenFileName(self, 'Open file') |
||||||
|
self.inputFileLE.setText(inFile[0]) |
||||||
|
if inFile[0] == '' : return '' |
||||||
|
print(f"Input File: {inFile}") |
||||||
|
with open(inFile[0], errors="replace") as inF: |
||||||
|
txt = inF.read() |
||||||
|
print(txt) |
||||||
|
self.inputFilePreview.setText(txt) |
||||||
|
self.inputFile = inFile[0] |
||||||
|
inFileEnd = inFile[0].split('/')[-1] |
||||||
|
outputRoot = self.inputFile.removesuffix(inFileEnd) |
||||||
|
self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.csv" |
||||||
|
self.outputFileLE.setText(self.outputFile) |
||||||
|
if self.reportTypeCB.currentText().split(" ")[-1].lower() not in self.inputFile.lower(): |
||||||
|
print("Possibly wrong file type") |
||||||
|
warning = QtWidgets.QMessageBox() |
||||||
|
warning.setWindowTitle("Warning: File Type Mis-Match") |
||||||
|
warning.setText(f"Selected report type is {self.reportTypeCB.currentText()} but input file did not contain '{self.reportTypeCB.currentText().split(' ')[-1].lower()}'!\n\ |
||||||
|
Make sure you select the correct report type before processing!") |
||||||
|
s = warning.exec() |
||||||
|
self.check_ready_to_process() |
||||||
|
|
||||||
|
def setOutput(self): |
||||||
|
outFile = QtWidgets.QFileDialog.getSaveFileName(self, "Output file name") |
||||||
|
if outFile[0] == '': return '' |
||||||
|
self.outputFileLE.setText(f"{outFile[0]}__{dt.now().strftime('%Y%m%d_%H_%M')}.xlsx") |
||||||
|
print(f"Output: {outFile}") |
||||||
|
self.outputFile = f"{outFile[0]}__{dt.now().strftime('%Y%m%d_%H_%M')}.xlsx" |
||||||
|
self.check_ready_to_process() |
||||||
|
|
||||||
|
def check_ready_to_process(self): |
||||||
|
self.rtp = True if ((self.inputFile != "") & (self.outputFile != "")) else False |
||||||
|
if self.rtp : |
||||||
|
self.processReportButton.setEnabled(True) |
||||||
|
|
||||||
|
def process_selection(self): |
||||||
|
with open(self.inputFile, errors="replace") as inF: |
||||||
|
reportString = inF.read() |
||||||
|
try: |
||||||
|
if self.reportTypeCB.currentText() == "ACH": |
||||||
|
extract_function = ilx.ach |
||||||
|
elif self.reportTypeCB.currentText() == "Disposition": |
||||||
|
extract_function = ilx.disposition |
||||||
|
elif self.reportTypeCB.currentText() == "Gain Loss": |
||||||
|
extract_function = ilx.gainloss |
||||||
|
elif self.reportTypeCB.currentText() == "Lock Box": |
||||||
|
extract_function = ilx.lockbox |
||||||
|
elif self.reportTypeCB.currentText() == "Minv_C": |
||||||
|
extract_function = ilx.minv |
||||||
|
elif self.reportTypeCB.currentText() == "Net Inv. Loans": |
||||||
|
extract_function = ilx.net_invest_trial_balance |
||||||
|
elif self.reportTypeCB.currentText() == "NI Renewal": |
||||||
|
extract_function = ilx.renewal_net_invest_trial_balance |
||||||
|
elif self.reportTypeCB.currentText() == "NIV After": |
||||||
|
extract_function = ilx.net_invest_trial_balance |
||||||
|
elif self.reportTypeCB.currentText() == "PBP Epay": |
||||||
|
extract_function = ilx.payment_transactions |
||||||
|
elif self.reportTypeCB.currentText() == "Unapplied": |
||||||
|
extract_function = ilx.unapplied |
||||||
|
elif self.reportTypeCB.currentText() == "VMCC": |
||||||
|
extract_function = ilx.payment_transactions |
||||||
|
elif self.reportTypeCB.currentText() == "Wires": |
||||||
|
extract_function = ilx.payment_transactions |
||||||
|
elif self.reportTypeCB.currentText() == "Returns": |
||||||
|
extract_function = ilx.payment_transactions |
||||||
|
|
||||||
|
dataframe = ilx.ILReport( |
||||||
|
location= self.inputFile, |
||||||
|
extraction_function=extract_function, |
||||||
|
output_location=self.outputFile, |
||||||
|
).process() |
||||||
|
dataframe.to_csv(self.outputFile, index=False) |
||||||
|
smallDF = dataframe.iloc[0:500,:] |
||||||
|
self.inputFilePreview.setText(smallDF.to_html(index=False)) |
||||||
|
self.openReportButton.setEnabled(True) |
||||||
|
except: |
||||||
|
error = QtWidgets.QMessageBox() |
||||||
|
error.setWindowTitle('Error Processing File!') |
||||||
|
error.setText(f"Unable to process {self.inputFile}!\nPlease check input file!") |
||||||
|
|
||||||
|
def preview_report(self): |
||||||
|
df = pd.read_excel(self.outputFile) |
||||||
|
self.inputFilePreview.setText(df.to_html()) |
||||||
|
|
||||||
|
def to_clipboard(self): |
||||||
|
df = pd.read_csv(self.outputFile) |
||||||
|
df.to_clipboard(excel=True) |
||||||
|
|
||||||
|
|
||||||
|
app = QtWidgets.QApplication(sys.argv) |
||||||
|
app.setStyle("Fusion") |
||||||
|
|
||||||
|
window = MainWindow() |
||||||
|
window.setWindowTitle("IL Extract") |
||||||
|
window.show() |
||||||
|
app.exec() |
||||||
@ -0,0 +1,140 @@ |
|||||||
|
# -*- coding: utf-8 -*- |
||||||
|
|
||||||
|
# Form implementation generated from reading ui file 'MonarchReplace2.ui' |
||||||
|
# |
||||||
|
# Created by: PyQt5 UI code generator 5.15.6 |
||||||
|
# |
||||||
|
# WARNING: Any manual changes made to this file will be lost when pyuic5 is |
||||||
|
# run again. Do not edit this file unless you know what you are doing. |
||||||
|
|
||||||
|
|
||||||
|
from PyQt5 import QtCore, QtGui, QtWidgets |
||||||
|
|
||||||
|
|
||||||
|
class Ui_MainWindow(object): |
||||||
|
def setupUi(self, MainWindow): |
||||||
|
MainWindow.setObjectName("MainWindow") |
||||||
|
MainWindow.resize(1001, 664) |
||||||
|
MainWindow.setWindowTitle('IL Extract') |
||||||
|
icon = QtGui.QIcon() |
||||||
|
icon.addPixmap(QtGui.QPixmap("extract.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) |
||||||
|
icon2 = QtGui.QIcon() |
||||||
|
icon2.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) |
||||||
|
iconCopy = QtGui.QIcon() |
||||||
|
iconCopy.addPixmap(QtGui.QPixmap("folder.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) |
||||||
|
iconProcess = QtGui.QIcon() |
||||||
|
iconProcess.addPixmap(QtGui.QPixmap("process.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off) |
||||||
|
MainWindow.setWindowIcon(icon) |
||||||
|
self.centralwidget = QtWidgets.QWidget(MainWindow) |
||||||
|
self.centralwidget.setObjectName("centralwidget") |
||||||
|
self.inputFilePreview = QtWidgets.QTextBrowser(self.centralwidget) |
||||||
|
self.inputFilePreview.setGeometry(QtCore.QRect(20, 220, 951, 391)) |
||||||
|
self.inputFilePreview.setObjectName("inputFilePreview") |
||||||
|
self.processReportButton = QtWidgets.QPushButton(self.centralwidget) |
||||||
|
self.processReportButton.setEnabled(False) |
||||||
|
self.processReportButton.setGeometry(QtCore.QRect(20, 180, 250, 36)) |
||||||
|
self.processReportButton.setObjectName("processReportButton") |
||||||
|
self.processReportButton.setIcon(iconProcess) |
||||||
|
self.openReportButton = QtWidgets.QPushButton(self.centralwidget) |
||||||
|
self.openReportButton.setEnabled(False) |
||||||
|
self.openReportButton.setGeometry(QtCore.QRect(280, 180, 241, 36)) |
||||||
|
self.openReportButton.setObjectName("openReportButton") |
||||||
|
self.openReportButton.setIcon(iconCopy) |
||||||
|
self.layoutWidget = QtWidgets.QWidget(self.centralwidget) |
||||||
|
self.layoutWidget.setGeometry(QtCore.QRect(21, 90, 951, 84)) |
||||||
|
self.layoutWidget.setObjectName("layoutWidget") |
||||||
|
self.fileSettingsBox = QtWidgets.QVBoxLayout(self.layoutWidget) |
||||||
|
self.fileSettingsBox.setContentsMargins(0, 0, 0, 0) |
||||||
|
self.fileSettingsBox.setObjectName("fileSettingsBox") |
||||||
|
self.inputFileBox = QtWidgets.QHBoxLayout() |
||||||
|
self.inputFileBox.setObjectName("inputFileBox") |
||||||
|
self.inputFileButton = QtWidgets.QPushButton(self.layoutWidget) |
||||||
|
self.inputFileButton.setMinimumSize(QtCore.QSize(250, 0)) |
||||||
|
self.inputFileButton.setMaximumSize(QtCore.QSize(250, 36)) |
||||||
|
self.inputFileButton.setIcon(icon2) |
||||||
|
self.inputFileButton.setObjectName("inputFileButton") |
||||||
|
self.inputFileBox.addWidget(self.inputFileButton) |
||||||
|
self.inputFileLE = QtWidgets.QLineEdit(self.layoutWidget) |
||||||
|
self.inputFileLE.setReadOnly(True) |
||||||
|
self.inputFileLE.setObjectName("inputFileLE") |
||||||
|
self.inputFileBox.addWidget(self.inputFileLE) |
||||||
|
self.fileSettingsBox.addLayout(self.inputFileBox) |
||||||
|
self.outFileLocation = QtWidgets.QHBoxLayout() |
||||||
|
self.outFileLocation.setObjectName("outFileLocation") |
||||||
|
self.outputFileButton = QtWidgets.QPushButton(self.layoutWidget) |
||||||
|
self.outputFileButton.setMinimumSize(QtCore.QSize(250, 0)) |
||||||
|
self.outputFileButton.setMaximumSize(QtCore.QSize(250, 36)) |
||||||
|
self.outputFileButton.setIcon(icon2) |
||||||
|
self.outputFileButton.setObjectName("outputFileButton") |
||||||
|
self.outFileLocation.addWidget(self.outputFileButton) |
||||||
|
self.outputFileLE = QtWidgets.QLineEdit(self.layoutWidget) |
||||||
|
self.outputFileLE.setReadOnly(True) |
||||||
|
self.outputFileLE.setObjectName("outputFileLE") |
||||||
|
self.outFileLocation.addWidget(self.outputFileLE) |
||||||
|
self.fileSettingsBox.addLayout(self.outFileLocation) |
||||||
|
self.reportTypeCB = QtWidgets.QComboBox(self.centralwidget) |
||||||
|
self.reportTypeCB.setGeometry(QtCore.QRect(21, 51, 250, 37)) |
||||||
|
self.reportTypeCB.setObjectName("reportTypeCB") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeCB.addItem("") |
||||||
|
self.reportTypeL = QtWidgets.QLabel(self.centralwidget) |
||||||
|
self.reportTypeL.setGeometry(QtCore.QRect(21, 21, 144, 24)) |
||||||
|
font = QtGui.QFont() |
||||||
|
font.setPointSize(14) |
||||||
|
font.setBold(True) |
||||||
|
font.setWeight(75) |
||||||
|
self.reportTypeL.setFont(font) |
||||||
|
self.reportTypeL.setObjectName("reportTypeL") |
||||||
|
MainWindow.setCentralWidget(self.centralwidget) |
||||||
|
self.menubar = QtWidgets.QMenuBar(MainWindow) |
||||||
|
self.menubar.setGeometry(QtCore.QRect(0, 0, 1001, 29)) |
||||||
|
self.menubar.setObjectName("menubar") |
||||||
|
MainWindow.setMenuBar(self.menubar) |
||||||
|
self.statusbar = QtWidgets.QStatusBar(MainWindow) |
||||||
|
self.statusbar.setObjectName("statusbar") |
||||||
|
MainWindow.setStatusBar(self.statusbar) |
||||||
|
self.reportTypeL.setBuddy(self.reportTypeCB) |
||||||
|
|
||||||
|
self.retranslateUi(MainWindow) |
||||||
|
QtCore.QMetaObject.connectSlotsByName(MainWindow) |
||||||
|
MainWindow.setTabOrder(self.reportTypeCB, self.inputFileButton) |
||||||
|
MainWindow.setTabOrder(self.inputFileButton, self.outputFileButton) |
||||||
|
MainWindow.setTabOrder(self.outputFileButton, self.processReportButton) |
||||||
|
MainWindow.setTabOrder(self.processReportButton, self.openReportButton) |
||||||
|
MainWindow.setTabOrder(self.openReportButton, self.inputFileLE) |
||||||
|
MainWindow.setTabOrder(self.inputFileLE, self.outputFileLE) |
||||||
|
MainWindow.setTabOrder(self.outputFileLE, self.inputFilePreview) |
||||||
|
|
||||||
|
def retranslateUi(self, MainWindow): |
||||||
|
_translate = QtCore.QCoreApplication.translate |
||||||
|
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow")) |
||||||
|
self.processReportButton.setText(_translate("MainWindow", "&Process Report")) |
||||||
|
self.openReportButton.setText(_translate("MainWindow", "&Copy to Clipboard")) |
||||||
|
self.inputFileButton.setText(_translate("MainWindow", "Select &InfoLease Report")) |
||||||
|
self.inputFileLE.setPlaceholderText(_translate("MainWindow", "No file selected")) |
||||||
|
self.outputFileButton.setText(_translate("MainWindow", "Select &Report Output Location")) |
||||||
|
self.outputFileLE.setPlaceholderText(_translate("MainWindow", "No location selected")) |
||||||
|
self.reportTypeCB.setItemText(0, _translate("MainWindow", "ACH")) |
||||||
|
self.reportTypeCB.setItemText(1, _translate("MainWindow", "Disposition")) |
||||||
|
self.reportTypeCB.setItemText(2, _translate("MainWindow", "Gain Loss")) |
||||||
|
self.reportTypeCB.setItemText(3, _translate("MainWindow", "Lock Box")) |
||||||
|
self.reportTypeCB.setItemText(4, _translate("MainWindow", "Minv_C")) |
||||||
|
self.reportTypeCB.setItemText(5, _translate("MainWindow", "Net Inv. Loans")) |
||||||
|
self.reportTypeCB.setItemText(6, _translate("MainWindow", "NI Renewal")) |
||||||
|
self.reportTypeCB.setItemText(7, _translate("MainWindow", "NIV After")) |
||||||
|
self.reportTypeCB.setItemText(8, _translate("MainWindow", "PBP Epay")) |
||||||
|
self.reportTypeCB.setItemText(9, _translate("MainWindow", "Returned Check")) |
||||||
|
self.reportTypeCB.setItemText(10, _translate("MainWindow", "Unapplied")) |
||||||
|
self.reportTypeCB.setItemText(11, _translate("MainWindow", "VMCC")) |
||||||
|
self.reportTypeCB.setItemText(12, _translate("MainWindow", "Wires")) |
||||||
|
self.reportTypeL.setText(_translate("MainWindow", "Infolease Report")) |
||||||
Loading…
Reference in new issue