You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
233 lines
15 KiB
233 lines
15 KiB
from pandas import DataFrame
|
|
import re
|
|
from logging import debug, DEBUG, basicConfig, warn
|
|
from typing import Optional, Union
|
|
|
|
|
|
#logConfig = basicConfig(filename='ILFormatter.log', encoding='utf-8', level=DEBUG, filemode='w')
|
|
CONTRACT_NO_REGEX = "\d{3}-\d{7}-\d{3}"
|
|
|
|
class Column:
|
|
def __init__(self, columnName: str, startIndex: int,
|
|
length: Optional[int] = None, endIndex: Optional[int] = None, valueRegex: Optional[str] = None) -> None:
|
|
|
|
assert length != None or endIndex != None, "You must specify either the length or endIndex of this column"
|
|
self.name = columnName
|
|
self.start = startIndex
|
|
self.end = endIndex if endIndex != None else startIndex + length
|
|
self.valueRegex = valueRegex
|
|
|
|
def __regex_check(self, value: str) -> bool:
|
|
if self.valueRegex == None: return True
|
|
return False if re.search(self.valueRegex, value) == None else True
|
|
|
|
def extract_column(self, line: str) -> tuple[str, Union[str, float]]:
|
|
debug(line)
|
|
end = self.end if self.end != -1 else len(line)
|
|
try:
|
|
dataValue: str = line[self.start : end].replace(',', '').strip()
|
|
except:
|
|
warn(f"NO DATA VALUE PRESENT ({self.name} | {self.start}-{self.end}): {line}")
|
|
if not self.__regex_check(dataValue):
|
|
warn(f"Invalid column value: Column: {self.name} value: {dataValue} regex: {self.valueRegex}")
|
|
try:
|
|
dataValue = float(dataValue)
|
|
except: pass
|
|
return self.name, dataValue
|
|
|
|
|
|
FIN_COLUMNS: list[Column] = [
|
|
Column("CUST.ID", startIndex= 0 ,endIndex = 21, valueRegex = "\d{8}"),
|
|
Column("CONTRACT.NO", startIndex= 21 ,endIndex = 37, valueRegex = "CONTRACT_NO_REGEX"),
|
|
Column("BUSINESS.TYPE", startIndex= 37 ,endIndex = 51, valueRegex = "\d{2}"),
|
|
Column("FED.ID", startIndex= 51 ,endIndex = 72, valueRegex = "\d{9}"),
|
|
Column("CUST.CREDIT.ACCT", startIndex= 72 ,endIndex = 89, valueRegex = "\d+"),
|
|
Column("CUSTOMER", startIndex= 89 ,endIndex = 120, valueRegex = None),
|
|
Column("LEASE.TYPE", startIndex= 120 ,endIndex = 131, valueRegex = None),
|
|
Column("EQUIPMENT.COST", startIndex= 131 ,endIndex = 146, valueRegex = None),
|
|
Column("CBR.", startIndex= 146 ,endIndex = 161, valueRegex = None),
|
|
Column("NET.INVESTMENT", startIndex= 161 ,endIndex = 176, valueRegex = None),
|
|
Column("ANNUAL.COMBINED.IRR", startIndex= 176 ,endIndex = 185, valueRegex = None),
|
|
Column("CONTRACT.TERM", startIndex= 185 ,endIndex = 199, valueRegex = None),
|
|
Column("INCOME.START.DATE", startIndex= 199 ,endIndex = 217, valueRegex = None),
|
|
Column("FIRST.PYMT.DATE", startIndex= 217 ,endIndex = 233, valueRegex = None),
|
|
Column("FIRST.PYMT.AMT", startIndex= 233 ,endIndex = 248, valueRegex = None),
|
|
Column("CONTRACT.PYMT.", startIndex= 248 ,endIndex = 263, valueRegex = None),
|
|
Column("INVOICE.CODE", startIndex= 263 ,endIndex = 276, valueRegex = None),
|
|
Column("INV.DAYS", startIndex= 276 ,endIndex = 285, valueRegex = None),
|
|
Column("INV.DUE.DAY", startIndex= 285 ,endIndex = 297, valueRegex = None),
|
|
Column("SEC.DEPOSIT.", startIndex= 297 ,endIndex = 312, valueRegex = None),
|
|
Column("IDC.AMOUNTS.", startIndex= 312 ,endIndex = 327, valueRegex = None),
|
|
Column("IDC.DATES.", startIndex= 327 ,endIndex = 338, valueRegex = None),
|
|
Column("RESIDUAL", startIndex= 338 ,endIndex = 353, valueRegex = None),
|
|
Column("MANAGERS.RESIDUAL", startIndex= 353 ,endIndex = 371, valueRegex = None),
|
|
Column("PROMOTION", startIndex= 371 ,endIndex = 381, valueRegex = None),
|
|
Column("PRODUCT.LINE", startIndex= 381 ,endIndex = 394, valueRegex = None),
|
|
Column("REGION", startIndex= 394 ,endIndex = 401, valueRegex = None),
|
|
Column("REGION.DESC.", startIndex= 401 ,endIndex = 432, valueRegex = None),
|
|
Column("BRANCH", startIndex= 432 ,endIndex = 439, valueRegex = None),
|
|
Column("BUSINESS.SEGMENT", startIndex= 439 ,endIndex = 456, valueRegex = None),
|
|
Column("LEAD.BANK", startIndex= 456 ,endIndex = 466, valueRegex = None),
|
|
Column("MRKTNG.REP", startIndex= 466 ,endIndex = 477, valueRegex = None),
|
|
Column("MRKTNG.REGION", startIndex= 477 ,endIndex = 491, valueRegex = None),
|
|
Column("REMIT.TO", startIndex= 491 ,endIndex = 500, valueRegex = None),
|
|
Column("PYMT.OPTION", startIndex= 500 ,endIndex = 512, valueRegex = None),
|
|
Column("BANK.CODE", startIndex= 512 ,endIndex = 522, valueRegex = None),
|
|
Column("TAPE.BANK.NUM", startIndex= 522 ,endIndex = 536, valueRegex = None),
|
|
Column("TAPE.ACCOUNT.NUM", startIndex= 536 ,endIndex = 557, valueRegex = None),
|
|
Column("TAPE.ACCT.TYPE", startIndex= 557 ,endIndex = 572, valueRegex = None),
|
|
Column("DEALER", startIndex= 572 ,endIndex = 583, valueRegex = None),
|
|
Column("PRIVATE.LABEL", startIndex= 583 ,endIndex = 597, valueRegex = None),
|
|
Column("RESID.METHOD", startIndex= 597 ,endIndex = 610, valueRegex = None),
|
|
Column("LATE.CHRG.EXMPT", startIndex= 610 ,endIndex = 626, valueRegex = None),
|
|
Column("INSURANCE.CODE", startIndex= 626 ,endIndex = 641, valueRegex = None),
|
|
Column("VARIABLE.DATE", startIndex= 641 ,endIndex = 655, valueRegex = None),
|
|
Column("VARIABLE.RATE", startIndex= 655 ,endIndex = 671, valueRegex = None),
|
|
Column("BILLING.CYCLE", startIndex= 671 ,endIndex = 685, valueRegex = None),
|
|
Column("UM.USER.DATE2", startIndex= 685 ,endIndex = 699, valueRegex = None),
|
|
Column("CR.ATTG.PHONE", startIndex= 699 ,endIndex = 715, valueRegex = None),
|
|
Column("GROSS.CONTRACT", startIndex= 715 ,endIndex = 730, valueRegex = None),
|
|
Column("ADV", startIndex= 730 ,endIndex = 734, valueRegex = None),
|
|
Column("PD.AMT.FINANCED ", startIndex= 735 ,endIndex = 751, valueRegex = None),
|
|
Column("PD.INCOME.START.DATE ", startIndex= 751 ,endIndex = 772, valueRegex = None),
|
|
Column("INVOICE.DESC", startIndex= 772 ,endIndex = 792, valueRegex = None),
|
|
Column("VARIABLE.PYMT.CODE ", startIndex= 792 ,endIndex = 811, valueRegex = None),
|
|
Column("PD.PAYMENT.AMT ", startIndex= 811 ,endIndex = 826, valueRegex = None),
|
|
Column("QUOTE.BUYOUT ", startIndex= 826 ,endIndex = 839, valueRegex = None),
|
|
Column("LATE.CHARGE.CODE ", startIndex= 839 ,endIndex = 856, valueRegex = None),
|
|
Column("LATE.CHRG.RATE ", startIndex= 856 ,endIndex = 871, valueRegex = None),
|
|
Column("M.DEF.COLLECTOR ", startIndex= 871 ,endIndex = 887, valueRegex = None),
|
|
Column("AM.ACH.LEAD.DAYS ", startIndex= 887 ,endIndex = 904, valueRegex = None),
|
|
Column("UNL POOL", startIndex= 904 ,endIndex = 915, valueRegex = None),
|
|
Column("PD RISK", startIndex= 915 ,endIndex = 926, valueRegex = None),
|
|
Column("PD RISK DATE.", startIndex= 926 ,endIndex = 940, valueRegex = None),
|
|
Column("LGD RISK", startIndex= 940 ,endIndex = 949, valueRegex = None),
|
|
Column("LGD DATE", startIndex= 949 ,endIndex = 960, valueRegex = None),
|
|
Column("Service By Others", startIndex= 960 ,endIndex = -1, valueRegex = None)
|
|
]
|
|
|
|
ASSET_COLS: list[Column] = [
|
|
Column("ASSET.#. ", startIndex= 0 ,endIndex = 9, valueRegex = None),
|
|
Column("CUST.ID. ", startIndex= 9 ,endIndex = 30, valueRegex = None),
|
|
Column("CONTRACT.NO ", startIndex= 30 ,endIndex = 46, valueRegex = None),
|
|
Column("CUST.CREDIT.ACCT ", startIndex= 46 ,endIndex = 63, valueRegex = None),
|
|
Column("CUST.NAME. ", startIndex= 63 ,endIndex = 84, valueRegex = None),
|
|
Column("EQUIP.DESC ", startIndex= 84 ,endIndex = 125, valueRegex = None),
|
|
Column("QUANTITY ", startIndex= 125 ,endIndex = 134, valueRegex = None),
|
|
Column("NEW.USED ", startIndex= 134 ,endIndex = 143, valueRegex = None),
|
|
Column("MODEL. ", startIndex= 143 ,endIndex = 164, valueRegex = None),
|
|
Column("A.MANUFACTURER.YEAR ", startIndex= 164 ,endIndex = 184, valueRegex = None),
|
|
Column("SERIAL.NUMBER. ", startIndex= 184 ,endIndex = 205, valueRegex = None),
|
|
Column("EQUIP.CODE ", startIndex= 205 ,endIndex = 216, valueRegex = None),
|
|
Column("EQUIP.CODE.DESC. ", startIndex= 216 ,endIndex = 247, valueRegex = None),
|
|
Column("ASSET.VENDOR ", startIndex= 247 ,endIndex = 260, valueRegex = None),
|
|
Column("ASSET.VENDOR.NAME. ", startIndex= 260 ,endIndex = 291, valueRegex = None),
|
|
Column("MANUFACTURER ", startIndex= 291 ,endIndex = 304, valueRegex = None),
|
|
Column("MANUFACT.NAME. ", startIndex= 304 ,endIndex = 335, valueRegex = None),
|
|
Column("UATB.EQUIP.ADDR1.45 ", startIndex= 335 ,endIndex = 381, valueRegex = None),
|
|
Column("UATB.EQUIP.ADDR2.45 ", startIndex= 381 ,endIndex = 427, valueRegex = None),
|
|
Column("EQUIP.CITY. ", startIndex= 427 ,endIndex = 453, valueRegex = None),
|
|
Column("EQUIP.STATE ", startIndex= 453 ,endIndex = 465, valueRegex = None),
|
|
Column("EQUIP.ZIP. ", startIndex= 465 ,endIndex = 476, valueRegex = None),
|
|
Column("STATE.TAX.CODE ", startIndex= 476 ,endIndex = 491, valueRegex = None),
|
|
Column("CNTY.TAX.CODE ", startIndex= 491 ,endIndex = 505, valueRegex = None),
|
|
Column("CITY.TAX.CODE ", startIndex= 505 ,endIndex = 519, valueRegex = None),
|
|
Column("PROP.STATUS ", startIndex= 519 ,endIndex = 531, valueRegex = None),
|
|
Column("EQUIP.COST ", startIndex= 531 ,endIndex = 546, valueRegex = None),
|
|
Column("EQUIP.COST.PCT ", startIndex= 546 ,endIndex = 561, valueRegex = None),
|
|
Column("PUR.OPTION ", startIndex= 561 ,endIndex = 572, valueRegex = None),
|
|
Column("PUR.OPTION. ", startIndex= 572 ,endIndex = 588, valueRegex = None),
|
|
Column("AS.RECOURSE.CODE ", startIndex= 588 ,endIndex = 605, valueRegex = None),
|
|
Column("RESID.AMT. ", startIndex= 605 ,endIndex = 620, valueRegex = None),
|
|
Column("BEG.DEPR.DATE ", startIndex= 620 ,endIndex = 634, valueRegex = None),
|
|
Column("OPER.LS.BEGIN.DATE ", startIndex= 634 ,endIndex = 653, valueRegex = None),
|
|
Column("OPER.LS.LIM ", startIndex= 653 ,endIndex = 665, valueRegex = None),
|
|
Column("OPER.LS.SALVAGE ", startIndex= 665 ,endIndex = -1, valueRegex = None)
|
|
]
|
|
|
|
CUST_COLS: list[Column] = [
|
|
Column("CONTRACT.NO ", startIndex= 0 ,endIndex = 16, valueRegex = None),
|
|
Column("CUST.CREDIT.ACCT ", startIndex= 16 ,endIndex = 33, valueRegex = None),
|
|
Column("CUST.ID. ", startIndex= 33 ,endIndex = 54, valueRegex = None),
|
|
Column("CUST.NAME. ", startIndex= 54 ,endIndex = 105, valueRegex = None),
|
|
Column("UATB.CUST.DBA. ", startIndex= 105 ,endIndex = 136, valueRegex = None),
|
|
Column("UATB.CUST.ADDRESS1.45 ", startIndex= 136 ,endIndex = 182, valueRegex = None),
|
|
Column("UATB.CUST.ADDRESS2.45 ", startIndex= 182 ,endIndex = 228, valueRegex = None),
|
|
Column("UATB.CUST.ADDRESS3.45 ", startIndex= 228 ,endIndex = 274, valueRegex = None),
|
|
Column("CUST.CITY. ", startIndex= 274 ,endIndex = 295, valueRegex = None),
|
|
Column("CUST.STATE ", startIndex= 295 ,endIndex = 306, valueRegex = None),
|
|
Column("CUST.ZIP ", startIndex= 306 ,endIndex = 317, valueRegex = None),
|
|
Column("GUAR.CODE.1 ", startIndex= 317 ,endIndex = 329, valueRegex = None),
|
|
Column("PRIN1/GUAR.NAME.1. ", startIndex= 329 ,endIndex = 365, valueRegex = None),
|
|
Column("PRIN1.ADD1. ", startIndex= 365 ,endIndex = 396, valueRegex = None),
|
|
Column("PRIN1.ADD2. ", startIndex= 396 ,endIndex = 427, valueRegex = None),
|
|
Column("PRIN1.CITY1. ", startIndex= 427 ,endIndex = 453, valueRegex = None),
|
|
Column("PRIN1.ST.1. ", startIndex= 453 ,endIndex = 464, valueRegex = None),
|
|
Column("ZIP.1. ", startIndex= 464 ,endIndex = 477, valueRegex = None),
|
|
Column("FED.ID/SS#1 ", startIndex= 477 ,endIndex = 503, valueRegex = None),
|
|
Column("GUAR.CODE.2.PRIN/GUAR.NAME.2. ", startIndex= 503 ,endIndex = 541, valueRegex = None),
|
|
Column("PRIN2.ADD2. ", startIndex= 541 ,endIndex = 572, valueRegex = None),
|
|
Column("PRIN2.ADDR2 ", startIndex= 572 ,endIndex = 603, valueRegex = None),
|
|
Column("PRIN2.CITY2. ", startIndex= 603 ,endIndex = 629, valueRegex = None),
|
|
Column("PRIN2.ST.2ZIP.2. ", startIndex= 629 ,endIndex = 653, valueRegex = None),
|
|
Column("FED.ID/SS#2 ", startIndex= 653 ,endIndex = 679, valueRegex = None),
|
|
Column("BILLING.NAME ", startIndex= 679 ,endIndex = 720, valueRegex = None),
|
|
Column("UATB.AR.ADDRESS1.45 ", startIndex= 720 ,endIndex = 766, valueRegex = None),
|
|
Column("UATB.AR.ADDRESS2.45 ", startIndex= 766 ,endIndex = 812, valueRegex = None),
|
|
Column("UATB.AR.ADDRESS3.45 ", startIndex= 812 ,endIndex = 858, valueRegex = None),
|
|
Column("AR.CITY. ", startIndex= 858 ,endIndex = 879, valueRegex = None),
|
|
Column("AR.STATE ", startIndex= 879 ,endIndex = 888, valueRegex = None),
|
|
Column("AR.ZIP ", startIndex= 888 ,endIndex = 899, valueRegex = None),
|
|
Column("AR.ATTN. ", startIndex= 899 ,endIndex = 920, valueRegex = None),
|
|
Column("UATB.CR.ATTG.NAME40. ", startIndex= 920 ,endIndex = 961, valueRegex = None),
|
|
Column("CR.SCORING ", startIndex= 961 ,endIndex = 972, valueRegex = None),
|
|
Column("FACILITY.SCORE ", startIndex= 972 ,endIndex = 988, valueRegex = None),
|
|
Column("SIC.CODE ", startIndex= 988 ,endIndex = -1, valueRegex = None),
|
|
]
|
|
|
|
DOB_COL: list[Column] = [
|
|
Column("CONTRACT.NO ", startIndex= 0 ,endIndex = 16, valueRegex = None),
|
|
Column("CUST.CREDIT.ACCT ", startIndex= 16 ,endIndex = 33, valueRegex = None),
|
|
Column("CUST.ID. ", startIndex= 33 ,endIndex = 54, valueRegex = None),
|
|
Column("GUAR.CODE.1 ", startIndex= 54 ,endIndex = 66, valueRegex = None),
|
|
Column("PRIN/GUAR.NAME.1. ", startIndex= 66 ,endIndex = 102, valueRegex = None),
|
|
Column("FED.ID/SS#1 ", startIndex= 102 ,endIndex = 128, valueRegex = None),
|
|
Column("DOB1 ", startIndex= 128 ,endIndex = 139, valueRegex = None),
|
|
Column("GUAR.CODE.2 ", startIndex= 139 ,endIndex = 151, valueRegex = None),
|
|
Column("PRIN/GUAR.NAME.2. ", startIndex= 151 ,endIndex = 177, valueRegex = None),
|
|
Column("FED.ID/SS#2 ", startIndex= 177 ,endIndex = -1, valueRegex = None)
|
|
]
|
|
|
|
def parse(ILOutput: str, columns: list[Column], dataColumnRegex: str = CONTRACT_NO_REGEX) -> DataFrame :
|
|
debug(ILOutput)
|
|
lines = ILOutput.splitlines()
|
|
dataDict = {}
|
|
for index, line in enumerate(lines):
|
|
debug(f"Index: {index} | {line}")
|
|
debug(re.search(dataColumnRegex, line))
|
|
if re.search(dataColumnRegex, line) == None: continue
|
|
for col in columns:
|
|
name, value = col.extract_column(line)
|
|
debug(f"name: {name} | value: {value}")
|
|
try:
|
|
dataDict[name].append(value)
|
|
except:
|
|
dataDict[name] = [value]
|
|
debug(dataDict)
|
|
try:
|
|
dataframe = DataFrame(dataDict)
|
|
except ValueError as ve:
|
|
debug({c: len(dataDict[c]) for c in dataDict.keys()})
|
|
debug(ve)
|
|
return dataframe
|
|
|
|
|
|
# extracts = [("FIN", FIN_COLUMNS), ("ASSET", ASSET_COLS), ("CUST", CUST_COLS), ("DOB", DOB_COL)]
|
|
|
|
# for file, columns in extracts:
|
|
# with open(f"Inputs/{file}", errors="replace") as reportFile:
|
|
# report: str = reportFile.read()
|
|
# # Removes characters that cause errors
|
|
# report: str = report.replace("^"," ")
|
|
# dataframe: DataFrame = parse(ILOutput=report, columns=columns)
|
|
# print(f"{file} dataframe: {dataframe}") |