You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
604 lines
27 KiB
604 lines
27 KiB
import os
|
|
import pandas as pd
|
|
from datetime import datetime as dt, timedelta
|
|
import sys, getopt
|
|
import re
|
|
from pathlib import Path
|
|
import time
|
|
import numpy as np
|
|
|
|
# contract numbers are a common feature in many reports to it's
|
|
# useful to have the regex for them globally avaiable
|
|
contract_number_regex = "\d{3}-\d{7}-\d{3}"
|
|
|
|
class ILReport:
|
|
"""
|
|
InfoLease Report class will be used to work with the files.
|
|
It makes it easier to add new reports to the workflow and to make it more clear where
|
|
the reports are coming from. It also helps with tracking reports that may not be ready yet.
|
|
"""
|
|
def __init__(self, location, extraction_function, output_location = None):
|
|
# The location where the InfoLease report is stored
|
|
self.location = location
|
|
# If output location not specified, save to the input location
|
|
if output_location == None:
|
|
self.output_location = Path(location).parent.absolute()
|
|
else:
|
|
self.output_location = output_location
|
|
# The function used to extract the data from the report
|
|
self.x_method = extraction_function
|
|
# Tracks whether the data was successfully exctracted
|
|
self.successful = False
|
|
|
|
|
|
def process(self):
|
|
try:
|
|
# Open the file and read it to a string | errors = 'replace' deals with non UTF-8 characters (no affect on output)
|
|
with open(self.location, errors="replace") as ifile:
|
|
report = ifile.read()
|
|
except IOError as ioe:
|
|
print(f"Failed to open file: {self.location}\n{ioe}")
|
|
self.successful = False
|
|
return 1
|
|
try:
|
|
# Run the associated method to extract the data and get the dataframe
|
|
dataframe = self.x_method(report, self.output_location)
|
|
except Exception as e:
|
|
print(f"Failed to create dataframe: {self.output_name}\n{e}")
|
|
self.successful = False
|
|
return 1
|
|
try:
|
|
assert(len(dataframe) > 1)
|
|
except Exception as e:
|
|
print(f"Data Length Error: {self.output_name} is empty:\n{dataframe}")
|
|
self.successful = False
|
|
return 1
|
|
return dataframe
|
|
|
|
|
|
def create_line_divider(breakage_list: list):
|
|
"""
|
|
This allows for the creation of a custom data extractor
|
|
Breakage list defines the split points that will be used for the line
|
|
Example
|
|
Given breakage_list [10, 20, 30]
|
|
using slot_num 0 in the resulting extract_line_slot will yield
|
|
characters 0 - 10 from the string.
|
|
Slot 1 would give characters 10 - 20
|
|
"""
|
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
|
|
"""
|
|
Pulls data from a line/string using break points defined by the
|
|
parent function.
|
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
|
|
Will automatically convert numbers to floats
|
|
"""
|
|
# We can't have a slot number higher than the number of slots
|
|
assert(slot_num < len(breakage_list)+1)
|
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
|
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
|
|
# In order to create a float we need to remove the , from the string
|
|
data = line_string[low_range:high_range].strip().replace(",", "")
|
|
try: data = float(data)
|
|
except: pass
|
|
if debug:
|
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
|
|
return data
|
|
return extract_line_slot
|
|
|
|
|
|
######################################################################################################################
|
|
# #
|
|
# EXTRACTION FUNCTIONS: used to pull data out of specific InfoLease report types #
|
|
# #
|
|
######################################################################################################################
|
|
"""
|
|
COMMON EXTRACTION COMPONENTS/FEATURES:
|
|
- lines = report.splitlines() : splits the reports into a list of lines (based on \n line breaks in document)
|
|
|
|
- extracted_data_dict : this is a dictionary that will hold the extracted data and will be used to create the dataframe
|
|
|
|
- columns = list(extracted_data_dict.keys()) : breaks the extracted_data_dict into a list of its keys (excel column heads)
|
|
|
|
- data_extractor = create_line_divider([#,#,#,#,#]): This creates a function we can use to pull data from a line based on
|
|
its 'slot position'. A slot position is the characters between the numbers specified in the list passed into the function
|
|
|
|
- for line in enumerate(lines): iterates through each line in the document. Line is a tuple of (line number, line string)
|
|
having the line number can be very useful when we need to access data in adjacent lines
|
|
|
|
- line# = list(zip(columns[#:#],[i for i in range(#,#)])): This creates a list with the tuple (column name, slot number).
|
|
It allows us to iterate through this list and make sure the correct data slots are being used for each column/key in the
|
|
data dictionary
|
|
|
|
COMMON REGEX COMPONENTS
|
|
\d : any digit [0-9]
|
|
\D : any character that is not a digit
|
|
\s : whitespace
|
|
. : any character besides newline (\n)
|
|
{#}: # number of the preceding character
|
|
* : 0 or more repetitions of the preceding character
|
|
"""
|
|
|
|
|
|
def ach(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"CustomerName" : [],
|
|
"BankCode" : [],
|
|
"BankNumber": [],
|
|
"AccountNumber" : [],
|
|
"Payment" : [],
|
|
"Batch": [],
|
|
"Lessor": [],
|
|
"PaymentDate": [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
batches = {
|
|
"batch_num": [],
|
|
"payment_date": [],
|
|
"lessor": [],
|
|
#"count": [],
|
|
"total": []
|
|
}
|
|
|
|
data_extractor = create_line_divider([19,57,67,82,104])
|
|
bank_number_regex = "\d{9}"
|
|
batch_num_regex = "BATCH \d{4} TOTAL"
|
|
for line in enumerate(lines):
|
|
# Check for a contract number and a bank number in the line
|
|
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
|
|
# Iterates through the columns list and adds the corresponding slot number to the dictonary for the column
|
|
# Here the order of the columns (keys in dictonary) matter since they need to be in the same order as
|
|
# the slot numbers
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)]
|
|
# This searches for a statement that looks like a batch number
|
|
# This sums the contracts by thier lessor code. A feature requested by cash apps
|
|
if re.search(batch_num_regex, line[1]) != None:
|
|
# Batch number is always in characters 96 to 101
|
|
batches["batch_num"].append(line[1][96:101])
|
|
# Payment date will be 2 lines below that between charactes 114 and 125
|
|
batches["payment_date"].append(lines[line[0]+2][114:125])
|
|
# Lessor is just the first three number sof the contract number
|
|
batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
|
|
# Total is a number given by the report for that batch. ',' is removed so that it can be transformed into a float
|
|
batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
|
|
#print(f"{line[0]+6} | {lines[line[0]+6][107:125]}\n{lines[line[0]+6]}")
|
|
#batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
|
|
# Any time there's a new batch we need to add this data to the dictionary up up to the currrent place
|
|
# So we iterate over the number of contracts and add in the newest value for each that don't have one of these values already
|
|
[extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
|
|
[extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))]
|
|
[extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))]
|
|
# Now the dictioanry lists should all be equal lengths and we can create a dataframe
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
# We're creating two sheets: data & summary so we need to open and excel writer
|
|
# This also helps with a bug caused by larger dataframes
|
|
with pd.ExcelWriter(save_name) as writer:
|
|
dataframe.to_excel(writer, index=False, sheet_name="data")
|
|
# The batches dictioanry is converted to a dataframe and added as it's own sheet
|
|
pd.DataFrame(batches).to_excel(writer, index=False, sheet_name="Summary")
|
|
return dataframe
|
|
|
|
def disposition(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"Amount Rec" : [],
|
|
"Trans Num" : [],
|
|
"Date RCVD": [],
|
|
"Date Posted" : [],
|
|
"Last Pymt Due" : [],
|
|
"Date Due" : [],
|
|
"Residual Amt" : [],
|
|
"Term Date" : [],
|
|
"Total Pastdue" : [],
|
|
"Customer Name" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
data_extractor = create_line_divider([15,32,41, 51, 61, 79,88, 103, 114])
|
|
for line in enumerate(lines):
|
|
if re.search(contract_number_regex, data_extractor(0,line[1])):
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)]
|
|
# Customer name is on a seperate line so we need to grab that seperately
|
|
extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip())
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False)
|
|
return dataframe
|
|
|
|
|
|
def gainloss(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
'REM RENT RCVB' : [],
|
|
'GUAR RESIDUAL' : [],
|
|
'ASSET VAL' : [],
|
|
'EQUITY ADDON' : [],
|
|
'CURR INT RCVB' : [],
|
|
'MISC G/L' : [],
|
|
'BLENDED INC' : [],
|
|
'CONTRACT NUMBER' : [],
|
|
'CURR RENT RCVB' : [],
|
|
'RESIDUAL' : [],
|
|
'END/SEC DEP' : [],
|
|
'SALES TAX' : [],
|
|
'INVENT CHANGE' : [],
|
|
'NET RESERVE' : [],
|
|
'LATE CHGS' : [],
|
|
'CUSTOMER NAME' : [],
|
|
'UNEARNED FIN' : [],
|
|
'UNAMORT RES' : [],
|
|
'MISC' : [],
|
|
'MISC TAX' : [],
|
|
'CASH RECEIVED' : [],
|
|
'RCV OFFSET' : [],
|
|
'GAIN/LOSS' : [],
|
|
'DISPOSITION CODE' : [],
|
|
'DISPOSITION DESC'
|
|
'UNEARNED IDC' : [],
|
|
'UNPAID INT' : [],
|
|
'PENALTY FEE' : [],
|
|
'UNPAID ACCRD' : [],
|
|
'RENEWAL RCVBL' : [],
|
|
'DEF REN INC' : [],
|
|
'DEF REN INT' : [],
|
|
'EARNED IDC' : [],
|
|
'GST BOOK G/L' : [],
|
|
'UNRECOG GST' : [],
|
|
'INT EARNED' : [],
|
|
'OVER/SHORT' : [],
|
|
'OPER RCVB' : [],
|
|
'OPER BASIS' : [],
|
|
'CTD OPER DEPR' : [],
|
|
}
|
|
# L0: BlendedInc 6
|
|
# L1: Late CHGS 14
|
|
# L2: Gain/Loss 22
|
|
# L3: Def Ren Int 30
|
|
# l4 Over/Short 35
|
|
# L5: CTD OPER
|
|
columns = list(extracted_data_dict.keys())
|
|
# These line data are used to tell the data extrator which values to pull for each line of
|
|
# relevant data. It pairs dictionary keys with thier corresponding data slot in the line
|
|
# so that they can be iterated through during data extraction
|
|
#
|
|
# It looks confusing but makes more sense if you look at the actual Info Lease reports
|
|
# This is one of the messiest reports
|
|
line0 = list(zip(columns[0:7],[i for i in range(1,8)]))
|
|
line1 = list(zip(columns[7:15],[i for i in range(0,8)]))
|
|
line2 = list(zip(columns[15:23], [i for i in range(0,8)]))
|
|
line3 = list(zip(columns[23:31], [i for i in range(0,8)]))
|
|
line4 = list(zip(columns[31:36], [i for i in range(1,8) if i not in [3,6]]))
|
|
line5 = list(zip(columns[36:], [i for i in range(1,4)]))
|
|
data_extractor = create_line_divider([27,43,58,74,88,105,120])
|
|
for line in enumerate(lines):
|
|
# The line must contain a contract number and the first data slot should be a float
|
|
if (re.search(contract_number_regex, data_extractor(0,line[1])) != None)&\
|
|
(type(data_extractor(1,line[1])) == float) :
|
|
data_section = lines[line[0]-1:line[0]+5]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[4])) for c in line4]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[5])) for c in line5]
|
|
|
|
df = pd.DataFrame(extracted_data_dict)
|
|
# The Accounting team wanted the disposotion code split into number and descriptionso...
|
|
disp_code = []
|
|
disp_descriptoin = []
|
|
for d in df['DISPOSITION CODE'].to_list():
|
|
disp_split = d.split(" ")
|
|
disp_code.append(disp_split[0])
|
|
disp_descriptoin.append(" ".join(disp_split[1:]))
|
|
df["DISPOSITION CODE"] = disp_code
|
|
df["DISPOSITION DESC"] = disp_descriptoin
|
|
df.to_excel(save_name, index=False)
|
|
return df
|
|
|
|
# Works for Net-inv-loans & NIV-after
|
|
def net_invest_trial_balance(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
'CUSTOMER NAME' : [],
|
|
'CURR INT RCVB' : [],
|
|
'UNEARNED BLENDED' : [],
|
|
'BLEND NET INV' : [],
|
|
'LEASE NUMBER' : [],
|
|
'GROSS CONTRACT' : [],
|
|
'CURR RENT RCVB' : [],
|
|
'UNEARN FIN' : [],
|
|
'END DEPOSIT' : [],
|
|
'SEC DEPOSIT' : [],
|
|
'LEASE PYMTS' : [],
|
|
'TOTAL' : [],
|
|
'CONTRACT STAT' : [],
|
|
'PAYMENTS RCVD' : [],
|
|
'REM RENT RCVB' : [],
|
|
'UNEARN RESID' : [],
|
|
'PROV LOSS' : [],
|
|
'NET RESERVE' : [],
|
|
'UNEARN INC' : [],
|
|
'BAL REMAINING' : [],
|
|
'RESIDUAL' : [],
|
|
'UNPAID INT' : [],
|
|
'NET INV' : [],
|
|
'UNEARNED IDC' : [],
|
|
"LESSOR": []
|
|
}
|
|
lessors = []
|
|
columns = list(extracted_data_dict.keys())
|
|
line0 = list(zip(columns[0:4], [0,3,4,5]))
|
|
line1 = list(zip(columns[4:12], [i for i in range(0,8)]))
|
|
line2 = list(zip(columns[12:19], [i for i in range(0,7)]))
|
|
line3 = list(zip(columns[19:-1], [i for i in range(1,6)]))
|
|
|
|
for l in [line0,line1,line2,line3]:
|
|
print(f"\n{l}")
|
|
|
|
data_extractor = create_line_divider([18,32,50,66,84,100,117])
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(0,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if re.search(contract_number_regex, slot1) != None:
|
|
data_section = lines[line[0]-1:line[0]+3]
|
|
# There were issues were the IL Report would have random blank lines so that needs to be checked
|
|
# and adjusted for
|
|
# A dead give away of an empty line in a data section is a line without a '.'
|
|
# Check the first data line
|
|
if data_section[0].find(".") == -1:
|
|
# Move it back if empty
|
|
data_section[0] = lines[line[0]-2]
|
|
# Now we go through each relevant data line and make sure they're not blank
|
|
for ds in enumerate(data_section):
|
|
if ds[1].find(".") == -1:
|
|
if ds[0] < len(data_section) -1:
|
|
for i in range(ds[0], len(data_section)-1):
|
|
# This allows us to move down all the data lines after a blank data line
|
|
data_section[i] = data_section[i+1]
|
|
# This handles the last data line which goes 'out-of-bounds' of the existing data selection
|
|
data_section[3] = lines[line[0]+3]
|
|
else:
|
|
data_section[3] = lines[line[0]+3]
|
|
# Now that the datasection is sorted we can extract the data
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0], False)) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1], False)) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2], False)) for c in line2]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3], False)) for c in line3]
|
|
extracted_data_dict["LESSOR"].append(extracted_data_dict["LEASE NUMBER"][-1][0:3])
|
|
# We keep track of when we see new lessors for a summary tab
|
|
if extracted_data_dict["LESSOR"][-1] not in lessors:
|
|
lessors.append(extracted_data_dict["LESSOR"][-1])
|
|
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
|
|
summary_series = []
|
|
for lessor in lessors:
|
|
reduced_df = dataframe.loc[dataframe["LESSOR"] == lessor]
|
|
# Delete columns that are strings as we don't need to sum them
|
|
del reduced_df["CUSTOMER NAME"]
|
|
del reduced_df["LEASE NUMBER"]
|
|
del reduced_df["CONTRACT STAT"]
|
|
reduced_df = reduced_df.replace("", np.NaN)
|
|
# There can sometimes be REVOLVING ACCOUNT over part of the data
|
|
# Just get rid of it
|
|
reduced_df = reduced_df.replace("REVOLV", np.NaN)
|
|
reduced_df = reduced_df.replace("ING ACCOUNT", np.NaN)
|
|
summation = reduced_df.sum(skipna=True, axis=0)
|
|
summation["LESSOR"] = lessor
|
|
summation["CONTRACT COUNT"] = len(reduced_df.index)
|
|
summary_series.append(summation)
|
|
summary_df = pd.concat(summary_series, axis=1).transpose().set_index("LESSOR")
|
|
with pd.ExcelWriter(save_name) as writer:
|
|
dataframe.to_excel(writer, index=False, sheet_name="data")
|
|
pd.DataFrame(summary_df).to_excel(writer, index=True, sheet_name="Summary")
|
|
return dataframe
|
|
|
|
|
|
def lockbox(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"SEQ" : [],
|
|
"PYMT DATE" : [],
|
|
"INV NUM" : [],
|
|
"CHECK NUMBER" : [],
|
|
"PAYMENT AMOUNT" : [],
|
|
"NOTE" : [],
|
|
"IL SEQ" : [],
|
|
"CONTRACT NUM" : [],
|
|
"IL PAYMENT AMOUNT" : [],
|
|
"CUST NAME" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
data_extractor = create_line_divider([9,19,39,56,69,89,98,118])
|
|
for line in enumerate(lines):
|
|
match = False
|
|
# Try to find the first SEQ # & a contract payment date e.i. ' 197 05/10/2022'
|
|
if re.match("(\s|\d){3}\d{1}\s{5}\d{2}/\d{2}/\d{4}", line[1]):
|
|
match = True
|
|
# Add all of the data points except customer name
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns)-1)]
|
|
# Check to see if this line contains only an infolease payment
|
|
# Some times there are multiple infolease payments for a single bank record
|
|
elif re.search(contract_number_regex, line[1]) != None:
|
|
match = True
|
|
# If there is then we can add the same data as the previous complete line
|
|
[extracted_data_dict[columns[c]].append(extracted_data_dict[columns[c]][-1]) for c in range(0,6)]
|
|
# Then add the new data for the infolease contract
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(6,len(columns)-1)]
|
|
# If we had a match we need a customer name to associate with it
|
|
# Sometimes these can appear on the next page hense the while loop searching for a match
|
|
if match:
|
|
# We can tell the cust name will be on the next page if the word "PAGE" appears three lines under the current line
|
|
# And the next line is blank
|
|
if (lines[line[0]+1].strip() == "") & (lines[line[0]+3].find("PAGE") != -1):
|
|
i = 0
|
|
# Look for a bunch of whitespace then some writing
|
|
while not re.match("\s{98}.{34}", lines[line[0]+i]):
|
|
i +=1
|
|
# Once we find it add the cust name to the dict (it's the only thing on the line)
|
|
extracted_data_dict["CUST NAME"].append(lines[line[0]+i].strip())
|
|
# if the condition above isnt met then the cust name is on the next line (even if that line is blank)
|
|
else:
|
|
extracted_data_dict["CUST NAME"].append(lines[line[0]+1].strip())
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False)
|
|
return dataframe
|
|
|
|
|
|
def minv(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
|
|
extracted_data_dict = {
|
|
"ContractNumber" : [],
|
|
"UTAB_OIC_DUE" : [],
|
|
"RentalDue" : [],
|
|
"UTAB_OIC_PYMT" : [],
|
|
"ChargeType" : [],
|
|
"OutstandBalance" : [],
|
|
"BizSegment" : [],
|
|
"BookingDate" : [],
|
|
"Branch" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
for line in enumerate(lines):
|
|
if re.search(contract_number_regex, line[1]) != None:
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
filtered = dataframe[
|
|
((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] > 0)) |\
|
|
((dataframe["BookingDate"] != '04/26/2022') & (dataframe["RentalDue"] == 0) & (dataframe["OutstandBalance"] > 100))]
|
|
#filtered.to_excel(save_name, index=False)
|
|
with open(save_name, 'w') as output:
|
|
[output.write(f"{contract}\n") for contract in filtered['ContractNumber'].to_list()]
|
|
return filtered
|
|
|
|
# Good for PUB_WIRES, VMCC, PBP_EPAY, returned check
|
|
def payment_transactions(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([6,33,52,62,80,89,110,121])
|
|
extracted_data_dict = {
|
|
'SEQ' : [],
|
|
'ACCOUNT NUMBER' : [],
|
|
'PYMT METHOD' : [],
|
|
'DATE RCVD' : [],
|
|
'AMOUNT' : [],
|
|
'REF NO': [],
|
|
'PAYMENT MEMO' : [],
|
|
'PYMT TYPE' : [],
|
|
'CHECK NO' : [],
|
|
'CUSTOMER NAME' : [],
|
|
'TRANSACTIONS NUM': [],
|
|
'INV NO' : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
transaction_num_regex = "\d{8}"
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(1,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if (re.search(contract_number_regex, slot1) or re.search("\d{3}\.\d{4}\.\d{4}", slot1))!= None:
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0,len(columns)-3)]
|
|
tnum_match = re.search(transaction_num_regex, lines[line[0]+1])
|
|
if tnum_match:
|
|
tnum = lines[line[0]+1][tnum_match.start():tnum_match.end()]
|
|
else:
|
|
tnum = ""
|
|
extracted_data_dict["TRANSACTIONS NUM"].append(tnum)
|
|
cname = lines[line[0]+1][6:37].strip()
|
|
extracted_data_dict['CUSTOMER NAME'].append(cname)
|
|
inv_no = lines[line[0]+1][79:90].strip()
|
|
extracted_data_dict['INV NO'].append(inv_no)
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False)
|
|
return dataframe
|
|
|
|
|
|
def renewal_net_invest_trial_balance(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
data_extractor = create_line_divider([21,29,43,58,71,88,99,113])
|
|
extracted_data_dict = {
|
|
'CUSTOMER NAME' : [],
|
|
'TYPE' : [],
|
|
'GROSS RENEWAL' : [],
|
|
'REMAINING BAL' : [],
|
|
'FINANCED RES' : [],
|
|
'REMAINING RES' : [],
|
|
'LEASE PYMTS' : [],
|
|
'CONTRACT NUMBER' : [],
|
|
'RENEWAL' : [],
|
|
'PAYMENTS RCVD' : [],
|
|
'CUR RENT RCVB' : [],
|
|
'UNEARNED RIN' : [],
|
|
'SECURITY DEP' : [],
|
|
'NET INVEST' : [],
|
|
'UNEARN INCOME' : [],
|
|
'TOTAL' : [],
|
|
'REM RENT RCVB' : [],
|
|
'UNPAID RES' : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7]))
|
|
line1 = list(zip(columns[7:16], [i for i in range(0,9)]))
|
|
line2 = list(zip(columns[16:], [3,4]))
|
|
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(0,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if re.search(contract_number_regex, slot1) != None:
|
|
data_section = lines[line[0]-1:line[0]+2]
|
|
# SEE net_invest_trial_balance FOR EXPLAINATION
|
|
if data_section[0].find(".") == -1:
|
|
data_section[0] = lines[line[0]-2]
|
|
for ds in enumerate(data_section):
|
|
if ds[1].find(".") == -1:
|
|
if ds[0] < len(data_section) -1:
|
|
for i in range(ds[0], len(data_section)-1):
|
|
data_section[i] = data_section[i+1]
|
|
data_section[2] = lines[line[0]+2]
|
|
else:
|
|
data_section[2] = lines[line[0]+2]
|
|
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False)
|
|
return dataframe
|
|
|
|
|
|
def unapplied(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
"Trans Num" : [],
|
|
"ContractNumber" : [],
|
|
"CheckNum" : [],
|
|
"Date RCVD" : [],
|
|
"Asset ID": [],
|
|
"Reversed Amt" : [],
|
|
"Branch" : [],
|
|
"Unapplied Susp Acct" : [],
|
|
"PaymentMemo" : [],
|
|
"Payers Name" : [],
|
|
"Batch Num" : [],
|
|
"Posting Date" : [],
|
|
"Unapplied Amt" : [],
|
|
"Rev Post Date" : [],
|
|
"Ref Num" : [],
|
|
"Check Amt" : [],
|
|
"Reason Code" : [],
|
|
}
|
|
columns = list(extracted_data_dict.keys())
|
|
# Iterate through the lines one at a time to look for relavant data
|
|
# Use enumerate so that we know which line we're currently working on
|
|
# this allows us to also work in the 'report' structure so that we can
|
|
# grab the customer name from the line proceding the data
|
|
data_extractor = create_line_divider([9,25, 38, 50, 65, 80, 89, 108])
|
|
trans_num = "\d{7}"
|
|
for line in enumerate(lines):
|
|
if (re.search("\d{7}", str(data_extractor(0,line[1],debug=False))) != None) &\
|
|
(re.search("\d{2}/\d{2}/\d{4}", str(data_extractor(3,line[1],debug=False))) != None):
|
|
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0,9)]
|
|
[extracted_data_dict[columns[8+c]].append(data_extractor(c,lines[line[0]+1])) for c in range(1,len(columns)-8)]
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
dataframe.to_excel(save_name, index=False)
|
|
return dataframe |