Fully working app. Interface needs work. Could use config to remember last opened location. Debug option not configured
commit
6b375ecb72
@ -0,0 +1,6 @@ |
||||
venv/ |
||||
Inputs/ |
||||
Outputs/ |
||||
__pycache__/ |
||||
*.csv |
||||
*.log |
||||
@ -0,0 +1,20 @@ |
||||
import re |
||||
import pandas as pd |
||||
|
||||
for input in ["ASSET", "CUST", "DOB"]: |
||||
with open(f"Inputs/{input}", errors="replace") as reportFile: |
||||
report: str = reportFile.read() |
||||
# Removes characters that cause errors |
||||
report: str = report.replace("^"," ") |
||||
colDict = {"ColName": [], "start": [], "end": []} |
||||
for line in report.splitlines(): |
||||
print(line.strip()) |
||||
if len(line.strip()) > 50: |
||||
matches = re.finditer('(\w|\.)+\s', line) |
||||
for match in matches: |
||||
print(match) |
||||
colDict["ColName"].append(match.group()) |
||||
colDict["start"].append(match.start()) |
||||
colDict["end"].append(match.end()) |
||||
break |
||||
pd.DataFrame(colDict).to_csv(f"Outputs/{input}.csv") |
||||
@ -0,0 +1,141 @@ |
||||
from pandas import DataFrame |
||||
import re |
||||
from logging import debug, DEBUG, basicConfig, warn |
||||
from typing import Optional, Union |
||||
|
||||
|
||||
|
||||
|
||||
logConfig = basicConfig(filename='ILFormatter.log', encoding='utf-8', level=DEBUG) |
||||
|
||||
TEST_FIN_LOCATION = r"Inputs/FIN" |
||||
TEST_ASSET_LOCATION = R"Inputs\ASSET" |
||||
|
||||
CONTRACT_NO_REGEX = "\d{3}-\d{7}-\d{3}" |
||||
|
||||
class Column: |
||||
def __init__(self, columnName: str, startIndex: int, |
||||
length: Optional[int] = None, endIndex: Optional[int] = None, valueRegex: Optional[str] = None) -> None: |
||||
|
||||
assert length != None or endIndex != None, "You must specify either the length or endIndex of this column" |
||||
self.name = columnName |
||||
self.start = startIndex |
||||
self.end = endIndex if endIndex != None else startIndex + length |
||||
self.valueRegex = valueRegex |
||||
|
||||
def __regex_check(self, value: str) -> bool: |
||||
if self.valueRegex == None: return True |
||||
return False if re.search(self.valueRegex, value) == None else True |
||||
|
||||
def extract_column(self, line: str) -> tuple[str, Union[str, float]]: |
||||
debug(line) |
||||
if self.end == -1: |
||||
end = len(line) |
||||
else: |
||||
assert len(line) >= self.end, f"Line is to short to extract value: len: {len(line)} > end : {self.end}" |
||||
end = self.end |
||||
dataValue: str = line[self.start : end].replace(',', '').strip() |
||||
if not self.__regex_check(dataValue): |
||||
warn(f"Invalid column value: Column: {self.name} value: {dataValue} regex: {self.valueRegex}") |
||||
try: |
||||
dataValue = float(dataValue) |
||||
except: pass |
||||
return self.name, dataValue |
||||
|
||||
|
||||
FIN_COLUMNS: list[Column] = [ |
||||
Column("CUST.ID", startIndex= 0 ,endIndex = 21, valueRegex = "\d{8}"), |
||||
Column("CONTRACT.NO", startIndex= 21 ,endIndex = 37, valueRegex = "CONTRACT_NO_REGEX"), |
||||
Column("BUSINESS.TYPE", startIndex= 37 ,endIndex = 51, valueRegex = "\d{2}"), |
||||
Column("FED.ID", startIndex= 51 ,endIndex = 72, valueRegex = "\d{9}"), |
||||
Column("CUST.CREDIT.ACCT", startIndex= 72 ,endIndex = 89, valueRegex = "\d+"), |
||||
Column("CUSTOMER", startIndex= 89 ,endIndex = 120, valueRegex = None), |
||||
Column("LEASE.TYPE", startIndex= 120 ,endIndex = 131, valueRegex = None), |
||||
Column("EQUIPMENT.COST", startIndex= 131 ,endIndex = 146, valueRegex = None), |
||||
Column("CBR.", startIndex= 146 ,endIndex = 161, valueRegex = None), |
||||
Column("NET.INVESTMENT", startIndex= 161 ,endIndex = 176, valueRegex = None), |
||||
Column("ANNUAL.COMBINED.IRR", startIndex= 176 ,endIndex = 185, valueRegex = None), |
||||
Column("CONTRACT.TERM", startIndex= 185 ,endIndex = 199, valueRegex = None), |
||||
Column("INCOME.START.DATE", startIndex= 199 ,endIndex = 217, valueRegex = None), |
||||
Column("FIRST.PYMT.DATE", startIndex= 217 ,endIndex = 233, valueRegex = None), |
||||
Column("FIRST.PYMT.AMT", startIndex= 233 ,endIndex = 248, valueRegex = None), |
||||
Column("CONTRACT.PYMT.", startIndex= 248 ,endIndex = 263, valueRegex = None), |
||||
Column("INVOICE.CODE", startIndex= 263 ,endIndex = 276, valueRegex = None), |
||||
Column("INV.DAYS", startIndex= 276 ,endIndex = 285, valueRegex = None), |
||||
Column("INV.DUE.DAY", startIndex= 285 ,endIndex = 297, valueRegex = None), |
||||
Column("SEC.DEPOSIT.", startIndex= 297 ,endIndex = 312, valueRegex = None), |
||||
Column("IDC.AMOUNTS.", startIndex= 312 ,endIndex = 327, valueRegex = None), |
||||
Column("IDC.DATES.", startIndex= 327 ,endIndex = 338, valueRegex = None), |
||||
Column("RESIDUAL", startIndex= 338 ,endIndex = 353, valueRegex = None), |
||||
Column("MANAGERS.RESIDUAL", startIndex= 353 ,endIndex = 371, valueRegex = None), |
||||
Column("PROMOTION", startIndex= 371 ,endIndex = 381, valueRegex = None), |
||||
Column("PRODUCT.LINE", startIndex= 381 ,endIndex = 394, valueRegex = None), |
||||
Column("REGION", startIndex= 394 ,endIndex = 401, valueRegex = None), |
||||
Column("REGION.DESC.", startIndex= 401 ,endIndex = 432, valueRegex = None), |
||||
Column("BRANCH", startIndex= 432 ,endIndex = 439, valueRegex = None), |
||||
Column("BUSINESS.SEGMENT", startIndex= 439 ,endIndex = 456, valueRegex = None), |
||||
Column("LEAD.BANK", startIndex= 456 ,endIndex = 466, valueRegex = None), |
||||
Column("MRKTNG.REP", startIndex= 466 ,endIndex = 477, valueRegex = None), |
||||
Column("MRKTNG.REGION", startIndex= 477 ,endIndex = 491, valueRegex = None), |
||||
Column("REMIT.TO", startIndex= 491 ,endIndex = 500, valueRegex = None), |
||||
Column("PYMT.OPTION", startIndex= 500 ,endIndex = 512, valueRegex = None), |
||||
Column("BANK.CODE", startIndex= 512 ,endIndex = 522, valueRegex = None), |
||||
Column("TAPE.BANK.NUM", startIndex= 522 ,endIndex = 536, valueRegex = None), |
||||
Column("TAPE.ACCOUNT.NUM", startIndex= 536 ,endIndex = 557, valueRegex = None), |
||||
Column("TAPE.ACCT.TYPE", startIndex= 557 ,endIndex = 572, valueRegex = None), |
||||
Column("DEALER", startIndex= 572 ,endIndex = 583, valueRegex = None), |
||||
Column("PRIVATE.LABEL", startIndex= 583 ,endIndex = 597, valueRegex = None), |
||||
Column("RESID.METHOD", startIndex= 597 ,endIndex = 610, valueRegex = None), |
||||
Column("LATE.CHRG.EXMPT", startIndex= 610 ,endIndex = 626, valueRegex = None), |
||||
Column("INSURANCE.CODE", startIndex= 626 ,endIndex = 641, valueRegex = None), |
||||
Column("VARIABLE.DATE", startIndex= 641 ,endIndex = 655, valueRegex = None), |
||||
Column("VARIABLE.RATE", startIndex= 655 ,endIndex = 671, valueRegex = None), |
||||
Column("BILLING.CYCLE", startIndex= 671 ,endIndex = 685, valueRegex = None), |
||||
Column("UM.USER.DATE2", startIndex= 685 ,endIndex = 699, valueRegex = None), |
||||
Column("CR.ATTG.PHONE", startIndex= 699 ,endIndex = 715, valueRegex = None), |
||||
Column("GROSS.CONTRACT", startIndex= 715 ,endIndex = 730, valueRegex = None), |
||||
Column("ADV", startIndex= 730 ,endIndex = 734, valueRegex = None), |
||||
Column("PD.AMT.FINANCED ", startIndex= 735 ,endIndex = 751, valueRegex = None), |
||||
Column("PD.INCOME.START.DATE ", startIndex= 751 ,endIndex = 772, valueRegex = None), |
||||
Column("INVOICE.DESC", startIndex= 772 ,endIndex = 792, valueRegex = None), |
||||
Column("VARIABLE.PYMT.CODE ", startIndex= 792 ,endIndex = 811, valueRegex = None), |
||||
Column("PD.PAYMENT.AMT ", startIndex= 811 ,endIndex = 826, valueRegex = None), |
||||
Column("QUOTE.BUYOUT ", startIndex= 826 ,endIndex = 839, valueRegex = None), |
||||
Column("LATE.CHARGE.CODE ", startIndex= 839 ,endIndex = 856, valueRegex = None), |
||||
Column("LATE.CHRG.RATE ", startIndex= 856 ,endIndex = 871, valueRegex = None), |
||||
Column("M.DEF.COLLECTOR ", startIndex= 871 ,endIndex = 887, valueRegex = None), |
||||
Column("AM.ACH.LEAD.DAYS ", startIndex= 887 ,endIndex = 904, valueRegex = None), |
||||
Column("UNL POOL", startIndex= 904 ,endIndex = 915, valueRegex = None), |
||||
Column("PD RISK", startIndex= 915 ,endIndex = 926, valueRegex = None), |
||||
Column("PD RISK DATE.", startIndex= 926 ,endIndex = 940, valueRegex = None), |
||||
Column("LGD RISK", startIndex= 940 ,endIndex = 949, valueRegex = None), |
||||
Column("LGD DATE", startIndex= 949 ,endIndex = 960, valueRegex = None), |
||||
Column("Service By Others", startIndex= 960 ,endIndex = -1, valueRegex = None) |
||||
] |
||||
|
||||
def parse(ILOutput: str, columns: list[Column], dataColumnRegex: str = CONTRACT_NO_REGEX) -> DataFrame : |
||||
debug(ILOutput) |
||||
lines = ILOutput.splitlines() |
||||
dataDict = {} |
||||
for index, line in enumerate(lines): |
||||
debug(f"Index: {index} | {line}") |
||||
debug(re.search(dataColumnRegex, line)) |
||||
if re.search(dataColumnRegex, line) == None: continue |
||||
for col in columns: |
||||
name, value = col.extract_column(line) |
||||
try: |
||||
dataDict[name].append(value) |
||||
except: |
||||
dataDict[name] = [value] |
||||
dataframe = DataFrame(dataDict) |
||||
return dataframe |
||||
|
||||
|
||||
|
||||
|
||||
with open(TEST_FIN_LOCATION, errors="replace") as reportFile: |
||||
report: str = reportFile.read() |
||||
# Removes characters that cause errors |
||||
report: str = report.replace("^"," ") |
||||
finDataframe: DataFrame = parse(ILOutput=report, columns=FIN_COLUMNS) |
||||
print(f"FIN dataframe: {finDataframe}") |
||||
Loading…
Reference in new issue