You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
540 lines
23 KiB
540 lines
23 KiB
import os
|
|
import pandas as pd
|
|
from datetime import datetime as dt, timedelta
|
|
import sys, getopt
|
|
import re
|
|
from pathlib import Path
|
|
import time
|
|
|
|
contract_number_regex = "\d{3}-\d{7}-\d{3}"
|
|
|
|
|
|
class ILReport:
|
|
"""
|
|
InfoLease Report class will be used to work with the files.
|
|
It makes it easier to add new reports to the workflow and to make it more clear where
|
|
the reports are coming from. It also helps with tracking reports that may not be ready yet.
|
|
"""
|
|
def __init__(self, location, extraction_function = None, output_location = None, output_name = None):
|
|
# The location where the InfoLease report is stored
|
|
self.location = location
|
|
# The base name of the file, corresponds to the report type
|
|
# If output location not specified, save to the input location
|
|
if output_location == None:
|
|
self.output_location = Path(location).parent.absolute()
|
|
else:
|
|
self.output_location = output_location
|
|
# This is optional but has a default
|
|
if output_name == None:
|
|
# Get the file name of the input and remove the date
|
|
self.output_name = os.path.basename(f"{self.location}")\
|
|
.replace(f"{(dt.now() - timedelta(days=+1)).strftime('%Y.%m.%d')}","")
|
|
else:
|
|
self.output_name = output_name
|
|
# The function used to extract the data from the report
|
|
self.x_method = extraction_function
|
|
# Tracks whether the data was successfully exctracted
|
|
self.successful = False
|
|
|
|
|
|
def run(self) -> int:
|
|
"""
|
|
This method is what actully run the report. I uses the specidied extraction function to create and save an excel document.
|
|
SUCESS returns 0
|
|
ERROR returns 1
|
|
Failure is also noted by self.success == False
|
|
"""
|
|
try:
|
|
# Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output)
|
|
with open(self.location, errors="replace") as ifile:
|
|
report = ifile.read()
|
|
except IOError as ioe:
|
|
print(f"Failed to open file: {self.location}\n{ioe}")
|
|
self.successful = False
|
|
return 1
|
|
try:
|
|
# Run the associated method to extract the data and get the dataframe
|
|
dataframe = self.x_method(report, self.output_location)
|
|
try:
|
|
assert(len(dataframe) > 1)
|
|
except Exception as e:
|
|
print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}")
|
|
self.successful = False
|
|
return 1
|
|
except Exception as e:
|
|
print(f"{self.output_name} failed to process:\n{e}")
|
|
self.successful = False
|
|
return 1
|
|
# try:
|
|
# # Save the dataframe as an excel document
|
|
# dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False)
|
|
# except Exception as e:
|
|
# self.successful = False
|
|
# print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}")
|
|
# return 1
|
|
# self.successful = True
|
|
return 0
|
|
def process(self):
|
|
try:
|
|
# Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output)
|
|
with open(self.location, errors="replace") as ifile:
|
|
report = ifile.read()
|
|
except IOError as ioe:
|
|
print(f"Failed to open file: {self.location}\n{ioe}")
|
|
self.successful = False
|
|
return 1
|
|
try:
|
|
# Run the associated method to extract the data and get the dataframe
|
|
dataframe = self.x_method(report, self.output_name)
|
|
try:
|
|
assert(len(dataframe) > 1)
|
|
except Exception as e:
|
|
print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}")
|
|
self.successful = False
|
|
return 1
|
|
except Exception as e:
|
|
print(f"{self.output_name} failed to process:\n{e}")
|
|
self.successful = False
|
|
return 1
|
|
return dataframe
|
|
|
|
|
|
def create_line_divider(breakage_list: list):
|
|
"""
|
|
This allows for the creation of a custom data extractor
|
|
Breakage list defines the split points that will be used for the line
|
|
Example
|
|
Given breakage_list [10, 20, 30]
|
|
using slot_num 0 in the resulting extract_line_slot will yield
|
|
characters 0 - 10 from the string.
|
|
Slot 1 would give characters 10 - 20
|
|
"""
|
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
|
|
"""
|
|
Pulls data from a line/string using break points defined by the
|
|
parent function.
|
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
|
|
Will automatically convert numbers to floats
|
|
"""
|
|
assert(slot_num < len(breakage_list)+1)
|
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
|
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
|
|
data = line_string[low_range:high_range].strip().replace(",", "")
|
|
try: data = float(data)
|
|
except: pass
|
|
if debug:
|
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
|
|
return data
|
|
return extract_line_slot
|
|
|
|
|
|
######################################################################################################################
|
|
# #
|
|
# EXTRACTION FUNCTIONS: used to pull data out of specific InfoLease report types #
|
|
# #
|
|
######################################################################################################################
|
|
"""
|
|
COMMON EXTRACTION COMPONENTS/FEATURES:
|
|
- lines = report.splitlines() : splits the reports into a list of lines (based on \n line breaks in document)
|
|
|
|
- extracted_data_dict : this is a dictionary that will hold the extracted data and will be used to create the dataframe
|
|
|
|
- columns = list(extracted_data_dict.keys()) : breaks the extracted_data_dict into a list of its keys (excel column heads)
|
|
|
|
- data_extractor = create_line_divider([#,#,#,#,#]): This creates a function we can use to pull data from a line based on
|
|
its 'slot position'. A slot position is the characters between the numbers specified in the list passed into the function
|
|
|
|
- for line in enumerate(lines): iterates through each line in the document. Line is a tuple of (line number, line string)
|
|
having the line number can be very useful when we need to access data in adjacent lines
|
|
|
|
- line# = list(zip(columns[#:#],[i for i in range(#,#)])): This creates a list with the tuple (column name, slot number).
|
|
It allows us to iterate through this list and make sure the correct data slots are being used for each column/key in the
|
|
data dictionary
|
|
|
|
COMMON REGEX COMPONENTS
|
|
\d : any digit [0-9]
|
|
\D : any character that is not a digit
|
|
\s : whitespace
|
|
. : any character besides newline (\n)
|
|
{#}: # number of the preceding character
|
|
* : 0 or more repetitions of the preceding character
|
|
"""
|
|
|
|
|
|
def ach(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"CustomerName" : [],
|
|
"BankCode" : [],
|
|
"BankNumber": [],
|
|
"AccountNumber" : [],
|
|
"Payment" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
data_extractor = create_line_divider([19,57,67,82,104])
|
|
bank_number_regex = "\d{9}"
|
|
for line in enumerate(lines):
|
|
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns))]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
def disposition(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"Amount Rec" : [],
|
|
"Trans Num" : [],
|
|
"Date RCVD": [],
|
|
"Date Posted" : [],
|
|
"Last Pymt Due" : [],
|
|
"Date Due" : [],
|
|
"Residual Amt" : [],
|
|
"Term Date" : [],
|
|
"Total Pastdue" : [],
|
|
"Customer Name" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
data_extractor = create_line_divider([15,32,41, 51, 61, 79,88, 103, 114])
|
|
for line in enumerate(lines):
|
|
if re.search(contract_number_regex, data_extractor(0,line[1])):
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)]
|
|
extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip())
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
|
|
def gainloss(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
'REM RENT RCVB' : [],
|
|
'GUAR RESIDUAL' : [],
|
|
'ASSET VAL' : [],
|
|
'EQUITY ADDON' : [],
|
|
'CURR INT RCVB' : [],
|
|
'MISC G/L' : [],
|
|
'BLENDED INC' : [],
|
|
'CONTRACT NUMBER' : [],
|
|
'CURR RENT RCVB' : [],
|
|
'RESIDUAL' : [],
|
|
'END/SEC DEP' : [],
|
|
'SALES TAX' : [],
|
|
'INVENT CHANGE' : [],
|
|
'NET RESERVE' : [],
|
|
'LATE CHGS' : [],
|
|
'CUSTOMER NAME' : [],
|
|
'UNEARNED FIN' : [],
|
|
'UNAMORT RES' : [],
|
|
'MISC' : [],
|
|
'MISC TAX' : [],
|
|
'CASH RECEIVED' : [],
|
|
'RCV OFFSET' : [],
|
|
'GAIN/LOSS' : [],
|
|
'DISPOSITION CODE' : [],
|
|
'DISPOSITION DESC'
|
|
'UNEARNED IDC' : [],
|
|
'UNPAID INT' : [],
|
|
'PENALTY FEE' : [],
|
|
'UNPAID ACCRD' : [],
|
|
'RENEWAL RCVBL' : [],
|
|
'DEF REN INC' : [],
|
|
'DEF REN INT' : [],
|
|
'EARNED IDC' : [],
|
|
'GST BOOK G/L' : [],
|
|
'UNRECOG GST' : [],
|
|
'INT EARNED' : [],
|
|
'OVER/SHORT' : [],
|
|
'OPER RCVB' : [],
|
|
'OPER BASIS' : [],
|
|
'CTD OPER DEPR' : [],
|
|
}
|
|
# L0: BlendedInc 6
|
|
# L1: Late CHGS 14
|
|
# L2: Gain/Loss 22
|
|
# L3: Def Ren Int 30
|
|
# l4 Over/Short 35
|
|
# L5: CTD OPER
|
|
columns = list(extracted_data_dict.keys())
|
|
# These line data are used to tell the data extrator which values to pull for each line of
|
|
# relevant data. It parits dictionary keys with thier corresponding data slot in the line
|
|
# So that they can be iterated through during data extraction
|
|
line0 = list(zip(columns[0:7],[i for i in range(1,8)]))
|
|
line1 = list(zip(columns[7:15],[i for i in range(0,8)]))
|
|
line2 = list(zip(columns[15:23], [i for i in range(0,8)]))
|
|
line3 = list(zip(columns[23:31], [i for i in range(0,8)]))
|
|
line4 = list(zip(columns[31:36], [i for i in range(1,8) if i not in [3,6]]))
|
|
line5 = list(zip(columns[36:], [i for i in range(1,4)]))
|
|
data_extractor = create_line_divider([27,43,58,74,88,105,120])
|
|
for line in enumerate(lines):
|
|
if (re.search(contract_number_regex, data_extractor(0,line[1])) != None)&\
|
|
(type(data_extractor(1,line[1])) == float) :
|
|
data_section = lines[line[0]-1:line[0]+5]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[4])) for c in line4]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[5])) for c in line5]
|
|
|
|
df = pd.DataFrame(extracted_data_dict)
|
|
# The Accounting team wanted the disposotion code split into number and descriptionso...
|
|
disp_code = []
|
|
disp_descriptoin = []
|
|
for d in df['DISPOSITION CODE'].to_list():
|
|
disp_split = d.split(" ")
|
|
disp_code.append(disp_split[0])
|
|
disp_descriptoin.append(" ".join(disp_split[1:]))
|
|
df["DISPOSITION CODE"] = disp_code
|
|
df["DISPOSITION DESC"] = disp_descriptoin
|
|
df.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return df
|
|
|
|
# Works for Net-inv-loans & NIV-after
|
|
def net_invest_trial_balance(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
'CUSTOMER NAME' : [],
|
|
'CURR INT RCVB' : [],
|
|
'UNEARNED BLENDED' : [],
|
|
'BLEND NET INV' : [],
|
|
'LEASE NUMBER' : [],
|
|
'GROSS CONTRACT' : [],
|
|
'CURR RENT RCVB' : [],
|
|
'UNEARN FIN' : [],
|
|
'END DEPOSIT' : [],
|
|
'SEC DEPOSIT' : [],
|
|
'LEASE PYMTS' : [],
|
|
'TOTAL' : [],
|
|
'CONTRACT STAT' : [],
|
|
'PAYMENTS RCVD' : [],
|
|
'REM RENT RCVB' : [],
|
|
'UNEARN RESID' : [],
|
|
'PROV LOSS' : [],
|
|
'NET RESERVE' : [],
|
|
'UNEARN INC' : [],
|
|
'BAL REMAINING' : [],
|
|
'RESIDUAL' : [],
|
|
'UNPAID INT' : [],
|
|
'NET INV' : [],
|
|
'UNEARNED IDC' : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
line0 = list(zip(columns[0:4], [0,3,4,5]))
|
|
line1 = list(zip(columns[4:12], [i for i in range(0,8)]))
|
|
line2 = list(zip(columns[12:19], [i for i in range(0,7)]))
|
|
line3 = list(zip(columns[19:], [i for i in range(1,6)]))
|
|
|
|
data_extractor = create_line_divider([18,35,53,67,87,106,117])
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(0,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if re.search(contract_number_regex, slot1) != None:
|
|
data_section = lines[line[0]-1:line[0]+4]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
|
|
def lockbox(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"CustomerName" : [],
|
|
"PaymentDate" : [],
|
|
"InvoiceNumber" : [],
|
|
"CheckNumber" : [],
|
|
"InvoicePayment" : [],
|
|
"ContractNumber" : [],
|
|
"ContractPayment" : [],
|
|
}
|
|
# These are lists of the dictionary columns/keys and the data slots in which
|
|
# that data can be found in the report. this way we can iterate through them
|
|
# While extracting data
|
|
bank_payment_records = [list(extracted_data_dict.keys())[1:5],[1,2,3,4]]
|
|
infolease_payment_records = [list(extracted_data_dict.keys())[5:],[7,8]]
|
|
|
|
# Below are the Regular Exppressions used to find relvant data lines
|
|
full_line = "\d*\s{5}\d{2}/\d{2}/\d{4}\s{4}1"
|
|
contract_only_line = "\s{90}\d.{7}1\d{2}-"
|
|
cust_name_line = "\s{98}.{28}\D*"
|
|
# The data extractor allows us to extract data from the report using slots
|
|
# Slots are ranges of character denote by the list feed into the creation function
|
|
data_extractor = create_line_divider([9,19,39,56,69,90,98,118])
|
|
for line in enumerate(lines):
|
|
# We can skip empty lines
|
|
if len(line[1]) == 0: continue
|
|
# First we should check if there is a full line of data (defined by regex)
|
|
if re.search(full_line, line[1]):
|
|
# If this is true then we can iterate through the lists we created earlier and append the data to our dict
|
|
for k in range(0,len(bank_payment_records[0])):
|
|
extracted_data_dict[bank_payment_records[0][k]].append(data_extractor(bank_payment_records[1][k],line[1]))
|
|
for k in range(0,len(infolease_payment_records[0])):
|
|
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
|
|
# Otherwise we should check if this is a line with only contract data
|
|
elif re.search(contract_only_line,line[1]):
|
|
# If that's the case we can use the 'bank payment data' from the previous entry since it should apply to his contract
|
|
for k in range(0,len(bank_payment_records[0])):
|
|
extracted_data_dict[bank_payment_records[0][k]].append(extracted_data_dict[bank_payment_records[0][k]][-1])
|
|
for k in range(0,len(infolease_payment_records[0])):
|
|
extracted_data_dict[infolease_payment_records[0][k]].append(data_extractor(infolease_payment_records[1][k],line[1]))
|
|
# If it doesn't hit either of these critera then continue since it's irelevant data
|
|
else: continue
|
|
i = 1
|
|
# used to track how many lines below the current line we're looking for the customer name
|
|
# keep moving down a line and checking for a customer name
|
|
# Customer name typically happens 1 line under data but can be 13 lines if cut off by page end
|
|
while re.search(cust_name_line,lines[line[0]+i]) == None:
|
|
i += 1
|
|
# Once it hits, add the name to the dict
|
|
extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
|
|
def minv(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"UTAB_OIC_DUE" : [],
|
|
"RentalDue" : [],
|
|
"UTAB_OIC_PYMT" : [],
|
|
"ChargeType" : [],
|
|
"OutstandBalance" : [],
|
|
"BizSegment" : [],
|
|
"BookingDate" : [],
|
|
"Branch" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
for line in enumerate(lines):
|
|
if re.search(contract_number_regex, line[1]) != None:
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
|
|
#All the list lengths need to be the same so if anything was missed it will fail to build
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
# Good for PUB_WIRES, VMCC, PBP_EPAY, returned check
|
|
def payment_transactions(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([6,33,52,62,80,89,110,121])
|
|
extracted_data_dict = {
|
|
'SEQ' : [],
|
|
'ACCOUNT NUMBER' : [],
|
|
'PYMT METHOD' : [],
|
|
'DATE RCVD' : [],
|
|
'AMOUNT' : [],
|
|
'REF NO': [],
|
|
'PAYMENT MEMO' : [],
|
|
'PYMT TYPE' : [],
|
|
'CHECK NO' : [],
|
|
'CUSTOMER NAME' : [],
|
|
'TRANSACTIONS NUM': [],
|
|
'INV NO' : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
transaction_num_regex = "\d{8}"
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(1,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if re.search(contract_number_regex, slot1) != None:
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)]
|
|
tnum_match = re.search(transaction_num_regex, lines[line[0]+1])
|
|
if tnum_match:
|
|
tnum = lines[line[0]+1][tnum_match.start():tnum_match.end()]
|
|
else:
|
|
tnum = ""
|
|
extracted_data_dict["TRANSACTIONS NUM"].append(tnum)
|
|
cname = lines[line[0]+1][6:37].strip()
|
|
extracted_data_dict['CUSTOMER NAME'].append(cname)
|
|
inv_no = lines[line[0]+1][79:90].strip()
|
|
extracted_data_dict['INV NO'].append(inv_no)
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
|
|
def renewal_net_invest_trial_balance(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([21,29,43,58,71,88,99,113])
|
|
extracted_data_dict = {
|
|
'CUSTOMER NAME' : [],
|
|
'TYPE' : [],
|
|
'GROSS RENEWAL' : [],
|
|
'CUR RENT RCVB' : [],
|
|
'UNEARNED RIN' : [],
|
|
'REMAINING RES' : [],
|
|
'LEASE PYMTS' : [],
|
|
'CONTRACT NUMBER' : [],
|
|
'RENEWAL' : [],
|
|
'PAYMENTS RCVD' : [],
|
|
'REM RENT RCVB' : [],
|
|
'UNPAID RES' : [],
|
|
'SECURITY DEP' : [],
|
|
'NET INVEST' : [],
|
|
'UNEARN INCOME' : [],
|
|
'TOTAL' : [],
|
|
'REMAINING BAL' : [],
|
|
'FINANCED RES' : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7]))
|
|
line1 = list(zip(columns[7:16], [i for i in range(0,9)]))
|
|
line2 = list(zip(columns[16:], [3,4]))
|
|
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(0,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if re.search(contract_number_regex, slot1) != None:
|
|
data_section = lines[line[0]-1:line[0]+4]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe
|
|
|
|
|
|
def unapplied(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"Trans Num" : [],
|
|
"ContractNumber" : [],
|
|
"CheckNum" : [],
|
|
"Date RCVD" : [],
|
|
"Asset ID": [],
|
|
"Reversed Amt" : [],
|
|
"Branch" : [],
|
|
"Unapplied Susp Acct" : [],
|
|
"PaymentMemo" : [],
|
|
"Payers Name" : [],
|
|
"Batch Num" : [],
|
|
"Posting Date" : [],
|
|
"Unapplied Amt" : [],
|
|
"Rev Post Date" : [],
|
|
"Ref Num" : [],
|
|
"Check Amt" : [],
|
|
"Reason Code" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
# Iterate through the lines one at a time to look for relavant data
|
|
# Use enumerate so that we know which line we're currently working on
|
|
# this allows us to also work in the 'report' structure so that we can
|
|
# grab the customer name from the line proceding the data
|
|
data_extractor = create_line_divider([9,25, 38, 50, 65, 80, 89, 108])
|
|
trans_num = "\d{7}"
|
|
for line in enumerate(lines):
|
|
if (re.search("\d{7}", str(data_extractor(0,line[1],debug=False))) != None) &\
|
|
(re.search("\d{2}/\d{2}/\d{4}", str(data_extractor(3,line[1],debug=False))) != None):
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0,9)]
|
|
[extracted_data_dict[columns[8+c]].append(data_extractor(c,lines[line[0]+1])) for c in range(1,len(columns)-8)]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
|
|
return dataframe |