Redid the column finding/extraction to be more flexible and less error

prone
Added documentation
master
= 3 years ago
parent cee42cccf5
commit 51edca51cf
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 2
      .gitignore
  2. 171
      IL Formatter.py
  3. 284
      ILParser.py
  4. 50
      PortfolioParser.spec
  5. 2
      config.json
  6. 6
      todo.txt

2
.gitignore vendored

@ -2,6 +2,8 @@ venv/
Inputs/
Outputs/
__pycache__/
build/
dist/
*.csv
*.log
.vscode/

@ -13,18 +13,28 @@ from time import sleep
# Open the config file, create a dict, and set up logging
with open("config.json") as configFile:
config: dict[Literal["loggingLevel"], Literal["ERROR", "WARNING", "INFO", "DEBUG"]] = load(configFile)
config: dict = load(configFile)
basicConfig(filename='ILFormatter.log', encoding='utf-8', level=config["loggingLevel"], filemode='w', force=True)
info(f"Starting with log level: {getLogger().level}")
# Change the current log level and save the change to config.json
def change_log_level(newLevel: Literal["ERROR", "WARNING", "INFO", "DEBUG"]):
"""
Changes the logging level of the logger and updates the configuration file.
Args:
newLevel (Literal["ERROR", "WARNING", "INFO", "DEBUG"]): The new logging level to set.
"""
# Update the logging level in the configuration dictionary and save to file
config["loggingLevel"] = newLevel
with open("config.json", 'w') as configFile:
dump(config, configFile)
# Set the logging level of the logger
getLogger().setLevel(newLevel)
# Print a message to indicate the new logging level
print(f"{now()} | New logging level: {getLogger().level}\n")
# Creates an error dialog pop up
# Based on the ui from errorDialog.py
def open_error_dialog(errorLabel: str, errorDescription: str, errorText: str):
@ -80,31 +90,55 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
self.debugAction.triggered.connect(lambda: self._switch_log_levels(self.debugAction))
def _check_files(self):
"""
Checks if all the required files have been loaded and the output location has been specified.
Enables or disables the open button and process button accordingly.
"""
# Print the paths of the loaded files for debugging
debug(self.assetFile)
debug(self.custFile)
debug(self.dobFile)
debug(self.finFile)
# Disable the open button
self.openButton.setEnabled(False)
# Check if all required files and output location have been loaded
ready = (
self.assetFile != None and
self.custFile != None and
self.dobFile != None and
self.finFile != None and
self.outputLocation != None
self.assetFile is not None and
self.custFile is not None and
self.dobFile is not None and
self.finFile is not None and
self.outputLocation is not None
)
# Enable or disable the process button based on readiness
self.processButton.setEnabled(ready)
def _set_file(self, lineEdit: QtWidgets.QLineEdit, selfFile: Literal["ASSET", "CUST", "DOB", "FIN"]) -> Optional[str] :
def _set_file(self, lineEdit: QtWidgets.QLineEdit, selfFile: Literal["ASSET", "CUST", "DOB", "FIN"]) -> Optional[str]:
"""
Sets a file location based on user input using a file dialog.
Args:
lineEdit (QtWidgets.QLineEdit): The line edit object that displays the selected file location.
selfFile (Literal["ASSET", "CUST", "DOB", "FIN"]): The file type being selected.
Returns:
Optional[str]: The selected file location, or None if no file is selected.
"""
# Get the default location based on the file type
defaultLocation = self._default_location(selfFile)
# Show the file dialog and get the selected file path
selectedFile: list[str] = QtWidgets.QFileDialog.getOpenFileName(self, "OpenFile", directory=defaultLocation)
debug(f"Selected file: {selectedFile}")
# Set the text of the line edit to the selected file path
lineEdit.setText(selectedFile[0])
# Save the selected file location for future reference
file = selectedFile[0] if selectedFile[0] != '' else None
if file!= None:
self._default_location(selfFile, set=fileRoot(file))
# If the output location has not been set yet, set it to the file root of the selected file
if file != None and self.outputLocation == None:
# Output will may be from memory, or based on file root
self._auto_output_set(fileRootStr=fileRoot(file))
# Set the correct file variable based on the file type
if selfFile == "ASSET":
self.assetFile = file
elif selfFile == "CUST":
@ -113,23 +147,49 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
self.dobFile = file
elif selfFile == "FIN":
self.finFile = file
# Check if all required files have been selected and enable the "process" button if so
self._check_files()
def _set_output(self):
"""
Set the output file location using a file dialog and update the UI accordingly.
Also, update the default location for output in the settings.
:return: None
"""
# Show a file dialog to get the save file name for the output
self.outputLocation = QtWidgets.QFileDialog.getSaveFileName(self, "Output file name") if QtWidgets.QFileDialog.getSaveFileName(self, "Output file name") != '' else None
# Update the output line edit in the UI with the selected file location or an empty string if no file was selected
self.outputLE.setText(self.outputLocation if self.outputLocation != None else '')
# Update the default output location in the settings, overwriting the existing value
self._default_location("output", set=fileRoot(self.outputLocation), overwrite=True)
# Log the selected output location
debug(f"Output Location: {self.outputLocation}")
def _default_location(self, file: Literal["ASSET", "CUST", "DOB", "FIN", "output"], set: str = None, overwrite: bool = False) -> Optional[str]:
# Gets the default location for the a file
# Set if set provided and default location is not set
# Will always overwrite the default location if overwrite is True
"""
Get or set the default location for a specified file.
If 'set' is provided and the default location is not set, or 'overwrite' is True, the default location will be updated.
:param file: The file type, one of ["ASSET", "CUST", "DOB", "FIN", "output"].
:param set: The new default location, if setting or updating it.
:param overwrite: Whether to overwrite the existing default location.
:return: The default location for the specified file.
"""
# Get the default location for the specified file
defaultLocation = config["directories"][file]
debug(f"Default location: {defaultLocation}")
if set != None:
debug(f"Setting default location to: {set} | ({(overwrite | (defaultLocation == None))})")
# Set or overwrite the default location if conditions are met
config["directories"][file] = set if (overwrite | (defaultLocation == None)) else defaultLocation
# Save the updated configuration
with open("config.json", 'w') as configFile:
dump(config, configFile)
return set
@ -137,96 +197,133 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
return defaultLocation
def _auto_output_set(self, fileRootStr):
# Check the config for a saved output location
"""
Automatically set the output file location based on the saved configuration.
:param fileRootStr: The root directory for the output file.
:return: None
"""
# Create the output file location string
self.outputLocation = fileRootStr + f"/Portfolio Contracts - {dt.now().strftime('%Y-%m-%d')}.xlsx"
# Update the default output location in the settings, without overwriting
self._default_location("output", set=fileRoot(self.outputLocation), overwrite=False)
# Update the output line edit in the UI with the auto-generated file location
self.outputLE.setText(self.outputLocation if self.outputLocation != None else '')
debug(f"Auto set output: {self.outputLocation}")
def _open_with_default_app(self, item):
"""
Opens the linked item with it's default application (excel)
Open the specified item with its default application (e.g. Excel).
:param item: The item to be opened with the default application.
:return: None
"""
debug(f"_open_with_default_app: {item}")
startfile(item)
def _switch_log_levels(self, newLevel: QtWidgets.QAction):
"""
Switch the log level for the application, updating the log level QAction objects accordingly.
:param newLevel: The new log level QAction to be set.
:return: None
"""
# Store the current log level QAction
oldLevel: QtWidgets.QAction = self.logLevel
print(f"{now()} | Log Level Changed: {oldLevel.text()} -> {newLevel.text()}")
# Update the QAction objects' state
newLevel.setChecked(True)
oldLevel.setChecked(False)
newLevel.setEnabled(False)
oldLevel.setEnabled(True)
# Update the log level reference in the class
self.logLevel = newLevel
# Change the log level in the logging system
change_log_level(newLevel.text().upper())
def _parse_file(self, filePath: str, parseColumns: list[ILParser.Column]) -> Optional[DataFrame]:
def _parse_file(self, filePath: str) -> Optional[DataFrame]:
"""
Parse a file at the given file path and extract data into a DataFrame using the ILParser.
:param filePath: The path to the file to be parsed.
:return: A DataFrame containing the extracted data, or None if parsing failed or the DataFrame is empty.
"""
with open(filePath) as file:
report = file.read()
debug(f"Report: {report}")
debug(f"Parse Columns:\n{parseColumns}")
debug(f"Parse Columns:\n{config['COLS']}")
try:
data: DataFrame = ILParser.parse(report, parseColumns)
data: DataFrame = ILParser.extract_data(report, config["COLS"])
except Exception as e:
logException(f"Failed to parse file-> {filePath} :\n{e}")
open_error_dialog("Parsing Error:",f"Failed to parse file-> {filePath}",repr(e))
open_error_dialog("Parsing Error:", f"Failed to parse file-> {filePath}", repr(e))
return None
debug(f"Data: {data}")
if data.empty:
error(f"Dataframe empty -> {filePath} | Returning none")
open_error_dialog("Data Processing Error:",f"Dataframe empty!",filePath)
open_error_dialog("Data Processing Error:", f"Dataframe empty!", filePath)
return None
else: return data
else:
return data
def _process(self):
assetDf: Optional[DataFrame] = self._parse_file(filePath= self.assetFile, parseColumns= ILParser.ASSET_COLS)
"""
Process the input files, parse their data, and write the results to an Excel file.
:return: None
"""
assetDf: Optional[DataFrame] = self._parse_file(filePath=self.assetFile)
debug(f"AssetDF: {assetDf} | {type(assetDf)} ")
if type(assetDf) != DataFrame:
debug(f"Parse Columns: {ILParser.ASSET_COLS}")
self.assetLE.setText("")
self.assetFile = None
return None
custDf: DataFrame = self._parse_file(self.custFile, ILParser.CUST_COLS)
custDf: DataFrame = self._parse_file(self.custFile)
debug(custDf)
if type(custDf) != DataFrame:
debug(f"Parse Columns: {ILParser.CUST_COLS}")
self.custLe.setText("")
self.custFile = None
return None
dobDf: DataFrame = self._parse_file(self.dobFile, ILParser.DOB_COL)
dobDf: DataFrame = self._parse_file(self.dobFile)
debug(dobDf)
if type(dobDf) != DataFrame:
debug(f"Parse Columns: {ILParser.DOB_COL}")
self.dobLE.setText("")
self.dobFile = None
return None
finDf: DataFrame = self._parse_file(self.finFile, ILParser.FIN_COLUMNS)
finDf: DataFrame = self._parse_file(self.finFile)
debug(finDf)
if type(finDf) != DataFrame:
debug(f"Parse Columns: {ILParser.FIN_COLUMNS}")
self.finLE.setText("")
self.finFile = None
return None
try:
with ExcelWriter(self.outputLocation) as writer:
assetDf.to_excel(writer, sheet_name="ASSET")
custDf.to_excel(writer, sheet_name="CUST")
dobDf.to_excel(writer, sheet_name="DOB")
finDf.to_excel(writer, sheet_name="FIN")
finDf.to_excel(writer, sheet_name="FIN", index=False)
custDf.to_excel(writer, sheet_name="CUST", index=False)
assetDf.to_excel(writer, sheet_name="ASSET", index=False)
dobDf.to_excel(writer, sheet_name="DOB", index=False)
except Exception as e:
logException(f"{now()} | Failed to write to excel -> {self.outputLocation} :\n{e}")
open_error_dialog("Failed to Create Excel", f"Failed to write to excel -> {self.outputLocation}", repr(e))
return None
debug("Finished writing to excel.")
self.openButton.setEnabled(True)
# Defines the app
app = QtWidgets.QApplication(argv)
# Sets the style

@ -1,233 +1,59 @@
from pandas import DataFrame
import re
from logging import debug, DEBUG, basicConfig, warn
from typing import Optional, Union
#logConfig = basicConfig(filename='ILFormatter.log', encoding='utf-8', level=DEBUG, filemode='w')
CONTRACT_NO_REGEX = "\d{3}-\d{7}-\d{3}"
class Column:
def __init__(self, columnName: str, startIndex: int,
length: Optional[int] = None, endIndex: Optional[int] = None, valueRegex: Optional[str] = None) -> None:
assert length != None or endIndex != None, "You must specify either the length or endIndex of this column"
self.name = columnName
self.start = startIndex
self.end = endIndex if endIndex != None else startIndex + length
self.valueRegex = valueRegex
def __regex_check(self, value: str) -> bool:
if self.valueRegex == None: return True
return False if re.search(self.valueRegex, value) == None else True
def extract_column(self, line: str) -> tuple[str, Union[str, float]]:
debug(line)
end = self.end if self.end != -1 else len(line)
try:
dataValue: str = line[self.start : end].replace(',', '').strip()
except:
warn(f"NO DATA VALUE PRESENT ({self.name} | {self.start}-{self.end}): {line}")
if not self.__regex_check(dataValue):
warn(f"Invalid column value: Column: {self.name} value: {dataValue} regex: {self.valueRegex}")
try:
dataValue = float(dataValue)
except: pass
return self.name, dataValue
FIN_COLUMNS: list[Column] = [
Column("CUST.ID", startIndex= 0 ,endIndex = 21, valueRegex = "\d{8}"),
Column("CONTRACT.NO", startIndex= 21 ,endIndex = 37, valueRegex = "CONTRACT_NO_REGEX"),
Column("BUSINESS.TYPE", startIndex= 37 ,endIndex = 51, valueRegex = "\d{2}"),
Column("FED.ID", startIndex= 51 ,endIndex = 72, valueRegex = "\d{9}"),
Column("CUST.CREDIT.ACCT", startIndex= 72 ,endIndex = 89, valueRegex = "\d+"),
Column("CUSTOMER", startIndex= 89 ,endIndex = 120, valueRegex = None),
Column("LEASE.TYPE", startIndex= 120 ,endIndex = 131, valueRegex = None),
Column("EQUIPMENT.COST", startIndex= 131 ,endIndex = 146, valueRegex = None),
Column("CBR.", startIndex= 146 ,endIndex = 161, valueRegex = None),
Column("NET.INVESTMENT", startIndex= 161 ,endIndex = 176, valueRegex = None),
Column("ANNUAL.COMBINED.IRR", startIndex= 176 ,endIndex = 185, valueRegex = None),
Column("CONTRACT.TERM", startIndex= 185 ,endIndex = 199, valueRegex = None),
Column("INCOME.START.DATE", startIndex= 199 ,endIndex = 217, valueRegex = None),
Column("FIRST.PYMT.DATE", startIndex= 217 ,endIndex = 233, valueRegex = None),
Column("FIRST.PYMT.AMT", startIndex= 233 ,endIndex = 248, valueRegex = None),
Column("CONTRACT.PYMT.", startIndex= 248 ,endIndex = 263, valueRegex = None),
Column("INVOICE.CODE", startIndex= 263 ,endIndex = 276, valueRegex = None),
Column("INV.DAYS", startIndex= 276 ,endIndex = 285, valueRegex = None),
Column("INV.DUE.DAY", startIndex= 285 ,endIndex = 297, valueRegex = None),
Column("SEC.DEPOSIT.", startIndex= 297 ,endIndex = 312, valueRegex = None),
Column("IDC.AMOUNTS.", startIndex= 312 ,endIndex = 327, valueRegex = None),
Column("IDC.DATES.", startIndex= 327 ,endIndex = 338, valueRegex = None),
Column("RESIDUAL", startIndex= 338 ,endIndex = 353, valueRegex = None),
Column("MANAGERS.RESIDUAL", startIndex= 353 ,endIndex = 371, valueRegex = None),
Column("PROMOTION", startIndex= 371 ,endIndex = 381, valueRegex = None),
Column("PRODUCT.LINE", startIndex= 381 ,endIndex = 394, valueRegex = None),
Column("REGION", startIndex= 394 ,endIndex = 401, valueRegex = None),
Column("REGION.DESC.", startIndex= 401 ,endIndex = 432, valueRegex = None),
Column("BRANCH", startIndex= 432 ,endIndex = 439, valueRegex = None),
Column("BUSINESS.SEGMENT", startIndex= 439 ,endIndex = 456, valueRegex = None),
Column("LEAD.BANK", startIndex= 456 ,endIndex = 466, valueRegex = None),
Column("MRKTNG.REP", startIndex= 466 ,endIndex = 477, valueRegex = None),
Column("MRKTNG.REGION", startIndex= 477 ,endIndex = 491, valueRegex = None),
Column("REMIT.TO", startIndex= 491 ,endIndex = 500, valueRegex = None),
Column("PYMT.OPTION", startIndex= 500 ,endIndex = 512, valueRegex = None),
Column("BANK.CODE", startIndex= 512 ,endIndex = 522, valueRegex = None),
Column("TAPE.BANK.NUM", startIndex= 522 ,endIndex = 536, valueRegex = None),
Column("TAPE.ACCOUNT.NUM", startIndex= 536 ,endIndex = 557, valueRegex = None),
Column("TAPE.ACCT.TYPE", startIndex= 557 ,endIndex = 572, valueRegex = None),
Column("DEALER", startIndex= 572 ,endIndex = 583, valueRegex = None),
Column("PRIVATE.LABEL", startIndex= 583 ,endIndex = 597, valueRegex = None),
Column("RESID.METHOD", startIndex= 597 ,endIndex = 610, valueRegex = None),
Column("LATE.CHRG.EXMPT", startIndex= 610 ,endIndex = 626, valueRegex = None),
Column("INSURANCE.CODE", startIndex= 626 ,endIndex = 641, valueRegex = None),
Column("VARIABLE.DATE", startIndex= 641 ,endIndex = 655, valueRegex = None),
Column("VARIABLE.RATE", startIndex= 655 ,endIndex = 671, valueRegex = None),
Column("BILLING.CYCLE", startIndex= 671 ,endIndex = 685, valueRegex = None),
Column("UM.USER.DATE2", startIndex= 685 ,endIndex = 699, valueRegex = None),
Column("CR.ATTG.PHONE", startIndex= 699 ,endIndex = 715, valueRegex = None),
Column("GROSS.CONTRACT", startIndex= 715 ,endIndex = 730, valueRegex = None),
Column("ADV", startIndex= 730 ,endIndex = 734, valueRegex = None),
Column("PD.AMT.FINANCED ", startIndex= 735 ,endIndex = 751, valueRegex = None),
Column("PD.INCOME.START.DATE ", startIndex= 751 ,endIndex = 772, valueRegex = None),
Column("INVOICE.DESC", startIndex= 772 ,endIndex = 792, valueRegex = None),
Column("VARIABLE.PYMT.CODE ", startIndex= 792 ,endIndex = 811, valueRegex = None),
Column("PD.PAYMENT.AMT ", startIndex= 811 ,endIndex = 826, valueRegex = None),
Column("QUOTE.BUYOUT ", startIndex= 826 ,endIndex = 839, valueRegex = None),
Column("LATE.CHARGE.CODE ", startIndex= 839 ,endIndex = 856, valueRegex = None),
Column("LATE.CHRG.RATE ", startIndex= 856 ,endIndex = 871, valueRegex = None),
Column("M.DEF.COLLECTOR ", startIndex= 871 ,endIndex = 887, valueRegex = None),
Column("AM.ACH.LEAD.DAYS ", startIndex= 887 ,endIndex = 904, valueRegex = None),
Column("UNL POOL", startIndex= 904 ,endIndex = 915, valueRegex = None),
Column("PD RISK", startIndex= 915 ,endIndex = 926, valueRegex = None),
Column("PD RISK DATE.", startIndex= 926 ,endIndex = 940, valueRegex = None),
Column("LGD RISK", startIndex= 940 ,endIndex = 949, valueRegex = None),
Column("LGD DATE", startIndex= 949 ,endIndex = 960, valueRegex = None),
Column("Service By Others", startIndex= 960 ,endIndex = -1, valueRegex = None)
]
ASSET_COLS: list[Column] = [
Column("ASSET.#. ", startIndex= 0 ,endIndex = 9, valueRegex = None),
Column("CUST.ID. ", startIndex= 9 ,endIndex = 30, valueRegex = None),
Column("CONTRACT.NO ", startIndex= 30 ,endIndex = 46, valueRegex = None),
Column("CUST.CREDIT.ACCT ", startIndex= 46 ,endIndex = 63, valueRegex = None),
Column("CUST.NAME. ", startIndex= 63 ,endIndex = 84, valueRegex = None),
Column("EQUIP.DESC ", startIndex= 84 ,endIndex = 125, valueRegex = None),
Column("QUANTITY ", startIndex= 125 ,endIndex = 134, valueRegex = None),
Column("NEW.USED ", startIndex= 134 ,endIndex = 143, valueRegex = None),
Column("MODEL. ", startIndex= 143 ,endIndex = 164, valueRegex = None),
Column("A.MANUFACTURER.YEAR ", startIndex= 164 ,endIndex = 184, valueRegex = None),
Column("SERIAL.NUMBER. ", startIndex= 184 ,endIndex = 205, valueRegex = None),
Column("EQUIP.CODE ", startIndex= 205 ,endIndex = 216, valueRegex = None),
Column("EQUIP.CODE.DESC. ", startIndex= 216 ,endIndex = 247, valueRegex = None),
Column("ASSET.VENDOR ", startIndex= 247 ,endIndex = 260, valueRegex = None),
Column("ASSET.VENDOR.NAME. ", startIndex= 260 ,endIndex = 291, valueRegex = None),
Column("MANUFACTURER ", startIndex= 291 ,endIndex = 304, valueRegex = None),
Column("MANUFACT.NAME. ", startIndex= 304 ,endIndex = 335, valueRegex = None),
Column("UATB.EQUIP.ADDR1.45 ", startIndex= 335 ,endIndex = 381, valueRegex = None),
Column("UATB.EQUIP.ADDR2.45 ", startIndex= 381 ,endIndex = 427, valueRegex = None),
Column("EQUIP.CITY. ", startIndex= 427 ,endIndex = 453, valueRegex = None),
Column("EQUIP.STATE ", startIndex= 453 ,endIndex = 465, valueRegex = None),
Column("EQUIP.ZIP. ", startIndex= 465 ,endIndex = 476, valueRegex = None),
Column("STATE.TAX.CODE ", startIndex= 476 ,endIndex = 491, valueRegex = None),
Column("CNTY.TAX.CODE ", startIndex= 491 ,endIndex = 505, valueRegex = None),
Column("CITY.TAX.CODE ", startIndex= 505 ,endIndex = 519, valueRegex = None),
Column("PROP.STATUS ", startIndex= 519 ,endIndex = 531, valueRegex = None),
Column("EQUIP.COST ", startIndex= 531 ,endIndex = 546, valueRegex = None),
Column("EQUIP.COST.PCT ", startIndex= 546 ,endIndex = 561, valueRegex = None),
Column("PUR.OPTION ", startIndex= 561 ,endIndex = 572, valueRegex = None),
Column("PUR.OPTION. ", startIndex= 572 ,endIndex = 588, valueRegex = None),
Column("AS.RECOURSE.CODE ", startIndex= 588 ,endIndex = 605, valueRegex = None),
Column("RESID.AMT. ", startIndex= 605 ,endIndex = 620, valueRegex = None),
Column("BEG.DEPR.DATE ", startIndex= 620 ,endIndex = 634, valueRegex = None),
Column("OPER.LS.BEGIN.DATE ", startIndex= 634 ,endIndex = 653, valueRegex = None),
Column("OPER.LS.LIM ", startIndex= 653 ,endIndex = 665, valueRegex = None),
Column("OPER.LS.SALVAGE ", startIndex= 665 ,endIndex = -1, valueRegex = None)
]
CUST_COLS: list[Column] = [
Column("CONTRACT.NO ", startIndex= 0 ,endIndex = 16, valueRegex = None),
Column("CUST.CREDIT.ACCT ", startIndex= 16 ,endIndex = 33, valueRegex = None),
Column("CUST.ID. ", startIndex= 33 ,endIndex = 54, valueRegex = None),
Column("CUST.NAME. ", startIndex= 54 ,endIndex = 105, valueRegex = None),
Column("UATB.CUST.DBA. ", startIndex= 105 ,endIndex = 136, valueRegex = None),
Column("UATB.CUST.ADDRESS1.45 ", startIndex= 136 ,endIndex = 182, valueRegex = None),
Column("UATB.CUST.ADDRESS2.45 ", startIndex= 182 ,endIndex = 228, valueRegex = None),
Column("UATB.CUST.ADDRESS3.45 ", startIndex= 228 ,endIndex = 274, valueRegex = None),
Column("CUST.CITY. ", startIndex= 274 ,endIndex = 295, valueRegex = None),
Column("CUST.STATE ", startIndex= 295 ,endIndex = 306, valueRegex = None),
Column("CUST.ZIP ", startIndex= 306 ,endIndex = 317, valueRegex = None),
Column("GUAR.CODE.1 ", startIndex= 317 ,endIndex = 329, valueRegex = None),
Column("PRIN1/GUAR.NAME.1. ", startIndex= 329 ,endIndex = 365, valueRegex = None),
Column("PRIN1.ADD1. ", startIndex= 365 ,endIndex = 396, valueRegex = None),
Column("PRIN1.ADD2. ", startIndex= 396 ,endIndex = 427, valueRegex = None),
Column("PRIN1.CITY1. ", startIndex= 427 ,endIndex = 453, valueRegex = None),
Column("PRIN1.ST.1. ", startIndex= 453 ,endIndex = 464, valueRegex = None),
Column("ZIP.1. ", startIndex= 464 ,endIndex = 477, valueRegex = None),
Column("FED.ID/SS#1 ", startIndex= 477 ,endIndex = 503, valueRegex = None),
Column("GUAR.CODE.2.PRIN/GUAR.NAME.2. ", startIndex= 503 ,endIndex = 541, valueRegex = None),
Column("PRIN2.ADD2. ", startIndex= 541 ,endIndex = 572, valueRegex = None),
Column("PRIN2.ADDR2 ", startIndex= 572 ,endIndex = 603, valueRegex = None),
Column("PRIN2.CITY2. ", startIndex= 603 ,endIndex = 629, valueRegex = None),
Column("PRIN2.ST.2ZIP.2. ", startIndex= 629 ,endIndex = 653, valueRegex = None),
Column("FED.ID/SS#2 ", startIndex= 653 ,endIndex = 679, valueRegex = None),
Column("BILLING.NAME ", startIndex= 679 ,endIndex = 720, valueRegex = None),
Column("UATB.AR.ADDRESS1.45 ", startIndex= 720 ,endIndex = 766, valueRegex = None),
Column("UATB.AR.ADDRESS2.45 ", startIndex= 766 ,endIndex = 812, valueRegex = None),
Column("UATB.AR.ADDRESS3.45 ", startIndex= 812 ,endIndex = 858, valueRegex = None),
Column("AR.CITY. ", startIndex= 858 ,endIndex = 879, valueRegex = None),
Column("AR.STATE ", startIndex= 879 ,endIndex = 888, valueRegex = None),
Column("AR.ZIP ", startIndex= 888 ,endIndex = 899, valueRegex = None),
Column("AR.ATTN. ", startIndex= 899 ,endIndex = 920, valueRegex = None),
Column("UATB.CR.ATTG.NAME40. ", startIndex= 920 ,endIndex = 961, valueRegex = None),
Column("CR.SCORING ", startIndex= 961 ,endIndex = 972, valueRegex = None),
Column("FACILITY.SCORE ", startIndex= 972 ,endIndex = 988, valueRegex = None),
Column("SIC.CODE ", startIndex= 988 ,endIndex = -1, valueRegex = None),
]
DOB_COL: list[Column] = [
Column("CONTRACT.NO ", startIndex= 0 ,endIndex = 16, valueRegex = None),
Column("CUST.CREDIT.ACCT ", startIndex= 16 ,endIndex = 33, valueRegex = None),
Column("CUST.ID. ", startIndex= 33 ,endIndex = 54, valueRegex = None),
Column("GUAR.CODE.1 ", startIndex= 54 ,endIndex = 66, valueRegex = None),
Column("PRIN/GUAR.NAME.1. ", startIndex= 66 ,endIndex = 102, valueRegex = None),
Column("FED.ID/SS#1 ", startIndex= 102 ,endIndex = 128, valueRegex = None),
Column("DOB1 ", startIndex= 128 ,endIndex = 139, valueRegex = None),
Column("GUAR.CODE.2 ", startIndex= 139 ,endIndex = 151, valueRegex = None),
Column("PRIN/GUAR.NAME.2. ", startIndex= 151 ,endIndex = 177, valueRegex = None),
Column("FED.ID/SS#2 ", startIndex= 177 ,endIndex = -1, valueRegex = None)
]
def parse(ILOutput: str, columns: list[Column], dataColumnRegex: str = CONTRACT_NO_REGEX) -> DataFrame :
debug(ILOutput)
lines = ILOutput.splitlines()
dataDict = {}
for index, line in enumerate(lines):
debug(f"Index: {index} | {line}")
debug(re.search(dataColumnRegex, line))
if re.search(dataColumnRegex, line) == None: continue
for col in columns:
name, value = col.extract_column(line)
debug(f"name: {name} | value: {value}")
try:
dataDict[name].append(value)
except:
dataDict[name] = [value]
debug(dataDict)
try:
dataframe = DataFrame(dataDict)
except ValueError as ve:
debug({c: len(dataDict[c]) for c in dataDict.keys()})
debug(ve)
return dataframe
# extracts = [("FIN", FIN_COLUMNS), ("ASSET", ASSET_COLS), ("CUST", CUST_COLS), ("DOB", DOB_COL)]
# for file, columns in extracts:
# with open(f"Inputs/{file}", errors="replace") as reportFile:
# report: str = reportFile.read()
# # Removes characters that cause errors
# report: str = report.replace("^"," ")
# dataframe: DataFrame = parse(ILOutput=report, columns=columns)
# print(f"{file} dataframe: {dataframe}")
COLUMN_NAME_REGEX = re.compile(r"(?P<column_name>(\w|\.|#|\/)+)", re.IGNORECASE)
def replace_bad_cols(line: str, cols: list[str]):
"""
Replaces bad column names in a string with modified names that have spaces replaced with dots.
Args:
line (str): The string containing the column names to modify.
cols (list[str]): A list of column names to modify.
Returns:
str: The modified string with bad column names replaced.
"""
for c in cols:
# Replace spaces with dots in the column name
gc = c.replace(' ', '.')
# Replace the bad column name with the modified column name in the string
line = line.replace(c, gc)
return line
def extract_data(input_doc: str, column_list: list[str]):
"""
Extracts data from a string in a table-like format, where columns are identified by a list of column names, and
returns the data as a Pandas DataFrame.
Args:
input_doc (str): The string containing the table-like data to extract.
column_list (list[str]): A list of column names to identify the columns in the table-like data.
Returns:
pandas.DataFrame: A DataFrame containing the extracted data from the input string.
"""
line: str
columns = {}
data = {}
for line in input_doc.splitlines():
if len(columns) == 0 :
# Find the line that contains the column names and replace bad column names
if re.search("^\w", line):
line = replace_bad_cols(line, column_list)
# Find the start and end positions of each column name and store them in a dictionary
columns_names = re.finditer(COLUMN_NAME_REGEX, line)
for c in columns_names:
columns[c.group("column_name")] = {"start": c.start(), "end": c.end()}
data[c.group("column_name")] = []
continue
elif len(line) < 2:
continue
# Check if we've reached the end of the table and return the data
if re.search("\d+ records listed", line):
return DataFrame(data)
# Extract the data from each column based on the start and end positions
for key, span in columns.items():
data[key].append(line[span["start"]:span["end"]].strip())

@ -0,0 +1,50 @@
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(
['IL Formatter.py'],
pathex=[],
binaries=[],
datas=[('config.json', '.')],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='PortfolioParser',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
coll = COLLECT(
exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='PortfolioParser',
)

@ -1 +1 @@
{"loggingLevel": "ERROR", "directories": {"ASSET": null, "CUST": null, "DOB": null, "FIN": null, "output": null}}
{"loggingLevel": "ERROR", "directories": {"ASSET": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "CUST": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "DOB": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "FIN": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "output": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs"}, "COLS": ["CUST ID", "CONTRACT NO", "BUSINESS TYPE", "FED ID", "CUST CREDIT ACCT", "CUSTOMER", "LEASE TYPE", "EQUIPMENT COST", "CBR", "NET INVESTMENT", "ANNUAL COMBINED IRR", "CONTRACT TERM", "INCOME START DATE", "FIRST PYMT DATE", "FIRST PYMT AMT", "CONTRACT PYMT", "INVOICE CODE", "INV DAYS", "INV DUE DAY", "SEC DEPOSIT", "IDC AMOUNTS", "IDC DATES", "RESIDUAL", "MANAGERS RESIDUAL", "PROMOTION", "PRODUCT LINE", "REGION", "REGION DESC", "BRANCH", "BUSINESS SEGMENT", "LEAD BANK", "MRKTNG REP", "MRKTNG REGION", "REMIT TO", "PYMT OPTION", "BANK CODE", "TAPE BANK NUM", "TAPE ACCOUNT NUM", "TAPE ACCT TYPE", "DEALER", "PRIVATE LABEL", "RESID METHOD", "LATE CHRG EXMPT", "INSURANCE CODE", "VARIABLE DATE", "VARIABLE RATE", "BILLING CYCLE", "UM USER DATE2", "CR ATTG PHONE", "GROSS CONTRACT", "ADV ", "PD AMT FINANCED", "PD INCOME START DATE", "INVOICE DESC", "VARIABLE PYMT CODE", "PD PAYMENT AMT", "QUOTE BUYOUT", "LATE CHARGE CODE", "LATE CHRG RATE", "M DEF COLLECTOR", "AM ACH LEAD DAYS", "UNL POOL", "PD RISK DATE", "PD RISK", "LGD RISK", "LGD DATE", "Service By Others", "CONTRACT NO", "CUST CREDIT ACCT", "CUST ID", "CUST NAME", "UATB CUST DBA", "UATB CUST ADDRESS1 45", "UATB CUST ADDRESS2 45", "UATB CUST ADDRESS3 45", "CUST CITY", "CUST STATE", "CUST ZIP", "GUAR CODE 1", "PRIN1/GUAR NAME 1", "PRIN1 ADD1", "PRIN1 ADD2", "PRIN1 CITY1", "PRIN1 ST 1", "ZIP 1", "FED ID/SS#1", "GUAR CODE 2 PRIN/GUAR NAME 2", "PRIN2 ADD2", "PRIN2 ADDR2", "PRIN2 CITY2", "PRIN2 ST 2ZIP 2", "FED ID/SS#2", "BILLING NAME", "UATB AR ADDRESS1 45", "UATB AR ADDRESS2 45", "UATB AR ADDRESS3 45", "AR CITY", "AR STATE", "AR ZIP", "AR ATTN", "UATB CR ATTG NAME40", "CR SCORING", "FACILITY SCORE", "SIC CODE", "ASSET #", "EQUIP DESC", "QUANTITY", "NEW USED", "MODEL", "A MANUFACTURER YEAR", "SERIAL NUMBER", "EQUIP CODE", "EQUIP CODE DESC", "ASSET VENDOR", "ASSET VENDOR NAME", "MANUFACTURER", "MANUFACT NAME", "UATB EQUIP ADDR1 45", "UATB EQUIP ADDR2 45", "EQUIP CITY", "EQUIP STATE", "EQUIP ZIP", "STATE TAX CODE", "CNTY TAX CODE", "CITY TAX CODE", "PROP STATUS", "EQUIP COST", "EQUIP COST PCT", "PUR OPTION", "PUR OPTION", "AS RECOURSE CODE", "RESID AMT", "BEG DEPR DATE", "OPER LS BEGIN DATE", "OPER LS LIM", "OPER LS SALVAGE", "PRIN/GUAR NAME 1", "DOB1", "GUAR CODE 2", "PRIN/GUAR NAME 2", "DOB2"]}

@ -1,4 +1,2 @@
[X] Working log level button
[X] Open file button
[X] Error Dialog
[X] Directory memory
[ ] Notification on completion
[ ] Icons
Loading…
Cancel
Save