diff --git a/.gitignore b/.gitignore index 653b988..de2f9cb 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ venv/ Inputs/ Outputs/ __pycache__/ +build/ +dist/ *.csv *.log .vscode/ diff --git a/main.py b/IL Formatter.py similarity index 60% rename from main.py rename to IL Formatter.py index 6816555..d3d741c 100644 --- a/main.py +++ b/IL Formatter.py @@ -13,18 +13,28 @@ from time import sleep # Open the config file, create a dict, and set up logging with open("config.json") as configFile: - config: dict[Literal["loggingLevel"], Literal["ERROR", "WARNING", "INFO", "DEBUG"]] = load(configFile) + config: dict = load(configFile) basicConfig(filename='ILFormatter.log', encoding='utf-8', level=config["loggingLevel"], filemode='w', force=True) info(f"Starting with log level: {getLogger().level}") # Change the current log level and save the change to config.json def change_log_level(newLevel: Literal["ERROR", "WARNING", "INFO", "DEBUG"]): + """ + Changes the logging level of the logger and updates the configuration file. + + Args: + newLevel (Literal["ERROR", "WARNING", "INFO", "DEBUG"]): The new logging level to set. + """ + # Update the logging level in the configuration dictionary and save to file config["loggingLevel"] = newLevel with open("config.json", 'w') as configFile: dump(config, configFile) + # Set the logging level of the logger getLogger().setLevel(newLevel) + # Print a message to indicate the new logging level print(f"{now()} | New logging level: {getLogger().level}\n") + # Creates an error dialog pop up # Based on the ui from errorDialog.py def open_error_dialog(errorLabel: str, errorDescription: str, errorText: str): @@ -80,31 +90,55 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): self.debugAction.triggered.connect(lambda: self._switch_log_levels(self.debugAction)) def _check_files(self): + """ + Checks if all the required files have been loaded and the output location has been specified. + Enables or disables the open button and process button accordingly. + """ + # Print the paths of the loaded files for debugging debug(self.assetFile) debug(self.custFile) debug(self.dobFile) debug(self.finFile) + # Disable the open button self.openButton.setEnabled(False) + # Check if all required files and output location have been loaded ready = ( - self.assetFile != None and - self.custFile != None and - self.dobFile != None and - self.finFile != None and - self.outputLocation != None + self.assetFile is not None and + self.custFile is not None and + self.dobFile is not None and + self.finFile is not None and + self.outputLocation is not None ) + # Enable or disable the process button based on readiness self.processButton.setEnabled(ready) + - def _set_file(self, lineEdit: QtWidgets.QLineEdit, selfFile: Literal["ASSET", "CUST", "DOB", "FIN"]) -> Optional[str] : + def _set_file(self, lineEdit: QtWidgets.QLineEdit, selfFile: Literal["ASSET", "CUST", "DOB", "FIN"]) -> Optional[str]: + """ + Sets a file location based on user input using a file dialog. + + Args: + lineEdit (QtWidgets.QLineEdit): The line edit object that displays the selected file location. + selfFile (Literal["ASSET", "CUST", "DOB", "FIN"]): The file type being selected. + + Returns: + Optional[str]: The selected file location, or None if no file is selected. + """ + # Get the default location based on the file type defaultLocation = self._default_location(selfFile) + # Show the file dialog and get the selected file path selectedFile: list[str] = QtWidgets.QFileDialog.getOpenFileName(self, "OpenFile", directory=defaultLocation) debug(f"Selected file: {selectedFile}") + # Set the text of the line edit to the selected file path lineEdit.setText(selectedFile[0]) + # Save the selected file location for future reference file = selectedFile[0] if selectedFile[0] != '' else None if file!= None: self._default_location(selfFile, set=fileRoot(file)) + # If the output location has not been set yet, set it to the file root of the selected file if file != None and self.outputLocation == None: - # Output will may be from memory, or based on file root self._auto_output_set(fileRootStr=fileRoot(file)) + # Set the correct file variable based on the file type if selfFile == "ASSET": self.assetFile = file elif selfFile == "CUST": @@ -113,23 +147,49 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): self.dobFile = file elif selfFile == "FIN": self.finFile = file + # Check if all required files have been selected and enable the "process" button if so self._check_files() - + + def _set_output(self): + """ + Set the output file location using a file dialog and update the UI accordingly. + Also, update the default location for output in the settings. + + :return: None + """ + # Show a file dialog to get the save file name for the output self.outputLocation = QtWidgets.QFileDialog.getSaveFileName(self, "Output file name") if QtWidgets.QFileDialog.getSaveFileName(self, "Output file name") != '' else None + + # Update the output line edit in the UI with the selected file location or an empty string if no file was selected self.outputLE.setText(self.outputLocation if self.outputLocation != None else '') + + # Update the default output location in the settings, overwriting the existing value self._default_location("output", set=fileRoot(self.outputLocation), overwrite=True) + + # Log the selected output location debug(f"Output Location: {self.outputLocation}") + def _default_location(self, file: Literal["ASSET", "CUST", "DOB", "FIN", "output"], set: str = None, overwrite: bool = False) -> Optional[str]: - # Gets the default location for the a file - # Set if set provided and default location is not set - # Will always overwrite the default location if overwrite is True + """ + Get or set the default location for a specified file. + If 'set' is provided and the default location is not set, or 'overwrite' is True, the default location will be updated. + + :param file: The file type, one of ["ASSET", "CUST", "DOB", "FIN", "output"]. + :param set: The new default location, if setting or updating it. + :param overwrite: Whether to overwrite the existing default location. + :return: The default location for the specified file. + """ + # Get the default location for the specified file defaultLocation = config["directories"][file] debug(f"Default location: {defaultLocation}") + if set != None: debug(f"Setting default location to: {set} | ({(overwrite | (defaultLocation == None))})") + # Set or overwrite the default location if conditions are met config["directories"][file] = set if (overwrite | (defaultLocation == None)) else defaultLocation + # Save the updated configuration with open("config.json", 'w') as configFile: dump(config, configFile) return set @@ -137,96 +197,133 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): return defaultLocation def _auto_output_set(self, fileRootStr): - # Check the config for a saved output location + """ + Automatically set the output file location based on the saved configuration. + + :param fileRootStr: The root directory for the output file. + :return: None + """ + # Create the output file location string self.outputLocation = fileRootStr + f"/Portfolio Contracts - {dt.now().strftime('%Y-%m-%d')}.xlsx" + # Update the default output location in the settings, without overwriting self._default_location("output", set=fileRoot(self.outputLocation), overwrite=False) + # Update the output line edit in the UI with the auto-generated file location self.outputLE.setText(self.outputLocation if self.outputLocation != None else '') debug(f"Auto set output: {self.outputLocation}") def _open_with_default_app(self, item): """ - Opens the linked item with it's default application (excel) + Open the specified item with its default application (e.g. Excel). + + :param item: The item to be opened with the default application. + :return: None """ debug(f"_open_with_default_app: {item}") startfile(item) - def _switch_log_levels(self, newLevel: QtWidgets.QAction): + """ + Switch the log level for the application, updating the log level QAction objects accordingly. + + :param newLevel: The new log level QAction to be set. + :return: None + """ + # Store the current log level QAction oldLevel: QtWidgets.QAction = self.logLevel print(f"{now()} | Log Level Changed: {oldLevel.text()} -> {newLevel.text()}") + # Update the QAction objects' state newLevel.setChecked(True) oldLevel.setChecked(False) newLevel.setEnabled(False) oldLevel.setEnabled(True) + # Update the log level reference in the class self.logLevel = newLevel + # Change the log level in the logging system change_log_level(newLevel.text().upper()) - - def _parse_file(self, filePath: str, parseColumns: list[ILParser.Column]) -> Optional[DataFrame]: + def _parse_file(self, filePath: str) -> Optional[DataFrame]: + """ + Parse a file at the given file path and extract data into a DataFrame using the ILParser. + + :param filePath: The path to the file to be parsed. + :return: A DataFrame containing the extracted data, or None if parsing failed or the DataFrame is empty. + """ with open(filePath) as file: report = file.read() debug(f"Report: {report}") - debug(f"Parse Columns:\n{parseColumns}") + debug(f"Parse Columns:\n{config['COLS']}") + try: - data: DataFrame = ILParser.parse(report, parseColumns) + data: DataFrame = ILParser.extract_data(report, config["COLS"]) except Exception as e: logException(f"Failed to parse file-> {filePath} :\n{e}") - open_error_dialog("Parsing Error:",f"Failed to parse file-> {filePath}",repr(e)) + open_error_dialog("Parsing Error:", f"Failed to parse file-> {filePath}", repr(e)) return None + debug(f"Data: {data}") + if data.empty: error(f"Dataframe empty -> {filePath} | Returning none") - open_error_dialog("Data Processing Error:",f"Dataframe empty!",filePath) + open_error_dialog("Data Processing Error:", f"Dataframe empty!", filePath) return None - else: return data + else: + return data def _process(self): - assetDf: Optional[DataFrame] = self._parse_file(filePath= self.assetFile, parseColumns= ILParser.ASSET_COLS) + """ + Process the input files, parse their data, and write the results to an Excel file. + + :return: None + """ + assetDf: Optional[DataFrame] = self._parse_file(filePath=self.assetFile) debug(f"AssetDF: {assetDf} | {type(assetDf)} ") + if type(assetDf) != DataFrame: - debug(f"Parse Columns: {ILParser.ASSET_COLS}") self.assetLE.setText("") self.assetFile = None - return None - custDf: DataFrame = self._parse_file(self.custFile, ILParser.CUST_COLS) + + custDf: DataFrame = self._parse_file(self.custFile) debug(custDf) + if type(custDf) != DataFrame: - debug(f"Parse Columns: {ILParser.CUST_COLS}") self.custLe.setText("") self.custFile = None - return None - dobDf: DataFrame = self._parse_file(self.dobFile, ILParser.DOB_COL) + + dobDf: DataFrame = self._parse_file(self.dobFile) debug(dobDf) + if type(dobDf) != DataFrame: debug(f"Parse Columns: {ILParser.DOB_COL}") self.dobLE.setText("") self.dobFile = None - return None - finDf: DataFrame = self._parse_file(self.finFile, ILParser.FIN_COLUMNS) + + finDf: DataFrame = self._parse_file(self.finFile) debug(finDf) + if type(finDf) != DataFrame: - debug(f"Parse Columns: {ILParser.FIN_COLUMNS}") self.finLE.setText("") self.finFile = None - return None + try: with ExcelWriter(self.outputLocation) as writer: - assetDf.to_excel(writer, sheet_name="ASSET") - custDf.to_excel(writer, sheet_name="CUST") - dobDf.to_excel(writer, sheet_name="DOB") - finDf.to_excel(writer, sheet_name="FIN") + finDf.to_excel(writer, sheet_name="FIN", index=False) + custDf.to_excel(writer, sheet_name="CUST", index=False) + assetDf.to_excel(writer, sheet_name="ASSET", index=False) + dobDf.to_excel(writer, sheet_name="DOB", index=False) except Exception as e: logException(f"{now()} | Failed to write to excel -> {self.outputLocation} :\n{e}") open_error_dialog("Failed to Create Excel", f"Failed to write to excel -> {self.outputLocation}", repr(e)) return None + debug("Finished writing to excel.") self.openButton.setEnabled(True) + # Defines the app app = QtWidgets.QApplication(argv) # Sets the style diff --git a/ILParser.py b/ILParser.py index 0f186de..bd395f2 100644 --- a/ILParser.py +++ b/ILParser.py @@ -1,233 +1,59 @@ from pandas import DataFrame import re -from logging import debug, DEBUG, basicConfig, warn -from typing import Optional, Union -#logConfig = basicConfig(filename='ILFormatter.log', encoding='utf-8', level=DEBUG, filemode='w') -CONTRACT_NO_REGEX = "\d{3}-\d{7}-\d{3}" - -class Column: - def __init__(self, columnName: str, startIndex: int, - length: Optional[int] = None, endIndex: Optional[int] = None, valueRegex: Optional[str] = None) -> None: - - assert length != None or endIndex != None, "You must specify either the length or endIndex of this column" - self.name = columnName - self.start = startIndex - self.end = endIndex if endIndex != None else startIndex + length - self.valueRegex = valueRegex - - def __regex_check(self, value: str) -> bool: - if self.valueRegex == None: return True - return False if re.search(self.valueRegex, value) == None else True - - def extract_column(self, line: str) -> tuple[str, Union[str, float]]: - debug(line) - end = self.end if self.end != -1 else len(line) - try: - dataValue: str = line[self.start : end].replace(',', '').strip() - except: - warn(f"NO DATA VALUE PRESENT ({self.name} | {self.start}-{self.end}): {line}") - if not self.__regex_check(dataValue): - warn(f"Invalid column value: Column: {self.name} value: {dataValue} regex: {self.valueRegex}") - try: - dataValue = float(dataValue) - except: pass - return self.name, dataValue - - -FIN_COLUMNS: list[Column] = [ - Column("CUST.ID", startIndex= 0 ,endIndex = 21, valueRegex = "\d{8}"), - Column("CONTRACT.NO", startIndex= 21 ,endIndex = 37, valueRegex = "CONTRACT_NO_REGEX"), - Column("BUSINESS.TYPE", startIndex= 37 ,endIndex = 51, valueRegex = "\d{2}"), - Column("FED.ID", startIndex= 51 ,endIndex = 72, valueRegex = "\d{9}"), - Column("CUST.CREDIT.ACCT", startIndex= 72 ,endIndex = 89, valueRegex = "\d+"), - Column("CUSTOMER", startIndex= 89 ,endIndex = 120, valueRegex = None), - Column("LEASE.TYPE", startIndex= 120 ,endIndex = 131, valueRegex = None), - Column("EQUIPMENT.COST", startIndex= 131 ,endIndex = 146, valueRegex = None), - Column("CBR.", startIndex= 146 ,endIndex = 161, valueRegex = None), - Column("NET.INVESTMENT", startIndex= 161 ,endIndex = 176, valueRegex = None), - Column("ANNUAL.COMBINED.IRR", startIndex= 176 ,endIndex = 185, valueRegex = None), - Column("CONTRACT.TERM", startIndex= 185 ,endIndex = 199, valueRegex = None), - Column("INCOME.START.DATE", startIndex= 199 ,endIndex = 217, valueRegex = None), - Column("FIRST.PYMT.DATE", startIndex= 217 ,endIndex = 233, valueRegex = None), - Column("FIRST.PYMT.AMT", startIndex= 233 ,endIndex = 248, valueRegex = None), - Column("CONTRACT.PYMT.", startIndex= 248 ,endIndex = 263, valueRegex = None), - Column("INVOICE.CODE", startIndex= 263 ,endIndex = 276, valueRegex = None), - Column("INV.DAYS", startIndex= 276 ,endIndex = 285, valueRegex = None), - Column("INV.DUE.DAY", startIndex= 285 ,endIndex = 297, valueRegex = None), - Column("SEC.DEPOSIT.", startIndex= 297 ,endIndex = 312, valueRegex = None), - Column("IDC.AMOUNTS.", startIndex= 312 ,endIndex = 327, valueRegex = None), - Column("IDC.DATES.", startIndex= 327 ,endIndex = 338, valueRegex = None), - Column("RESIDUAL", startIndex= 338 ,endIndex = 353, valueRegex = None), - Column("MANAGERS.RESIDUAL", startIndex= 353 ,endIndex = 371, valueRegex = None), - Column("PROMOTION", startIndex= 371 ,endIndex = 381, valueRegex = None), - Column("PRODUCT.LINE", startIndex= 381 ,endIndex = 394, valueRegex = None), - Column("REGION", startIndex= 394 ,endIndex = 401, valueRegex = None), - Column("REGION.DESC.", startIndex= 401 ,endIndex = 432, valueRegex = None), - Column("BRANCH", startIndex= 432 ,endIndex = 439, valueRegex = None), - Column("BUSINESS.SEGMENT", startIndex= 439 ,endIndex = 456, valueRegex = None), - Column("LEAD.BANK", startIndex= 456 ,endIndex = 466, valueRegex = None), - Column("MRKTNG.REP", startIndex= 466 ,endIndex = 477, valueRegex = None), - Column("MRKTNG.REGION", startIndex= 477 ,endIndex = 491, valueRegex = None), - Column("REMIT.TO", startIndex= 491 ,endIndex = 500, valueRegex = None), - Column("PYMT.OPTION", startIndex= 500 ,endIndex = 512, valueRegex = None), - Column("BANK.CODE", startIndex= 512 ,endIndex = 522, valueRegex = None), - Column("TAPE.BANK.NUM", startIndex= 522 ,endIndex = 536, valueRegex = None), - Column("TAPE.ACCOUNT.NUM", startIndex= 536 ,endIndex = 557, valueRegex = None), - Column("TAPE.ACCT.TYPE", startIndex= 557 ,endIndex = 572, valueRegex = None), - Column("DEALER", startIndex= 572 ,endIndex = 583, valueRegex = None), - Column("PRIVATE.LABEL", startIndex= 583 ,endIndex = 597, valueRegex = None), - Column("RESID.METHOD", startIndex= 597 ,endIndex = 610, valueRegex = None), - Column("LATE.CHRG.EXMPT", startIndex= 610 ,endIndex = 626, valueRegex = None), - Column("INSURANCE.CODE", startIndex= 626 ,endIndex = 641, valueRegex = None), - Column("VARIABLE.DATE", startIndex= 641 ,endIndex = 655, valueRegex = None), - Column("VARIABLE.RATE", startIndex= 655 ,endIndex = 671, valueRegex = None), - Column("BILLING.CYCLE", startIndex= 671 ,endIndex = 685, valueRegex = None), - Column("UM.USER.DATE2", startIndex= 685 ,endIndex = 699, valueRegex = None), - Column("CR.ATTG.PHONE", startIndex= 699 ,endIndex = 715, valueRegex = None), - Column("GROSS.CONTRACT", startIndex= 715 ,endIndex = 730, valueRegex = None), - Column("ADV", startIndex= 730 ,endIndex = 734, valueRegex = None), - Column("PD.AMT.FINANCED ", startIndex= 735 ,endIndex = 751, valueRegex = None), - Column("PD.INCOME.START.DATE ", startIndex= 751 ,endIndex = 772, valueRegex = None), - Column("INVOICE.DESC", startIndex= 772 ,endIndex = 792, valueRegex = None), - Column("VARIABLE.PYMT.CODE ", startIndex= 792 ,endIndex = 811, valueRegex = None), - Column("PD.PAYMENT.AMT ", startIndex= 811 ,endIndex = 826, valueRegex = None), - Column("QUOTE.BUYOUT ", startIndex= 826 ,endIndex = 839, valueRegex = None), - Column("LATE.CHARGE.CODE ", startIndex= 839 ,endIndex = 856, valueRegex = None), - Column("LATE.CHRG.RATE ", startIndex= 856 ,endIndex = 871, valueRegex = None), - Column("M.DEF.COLLECTOR ", startIndex= 871 ,endIndex = 887, valueRegex = None), - Column("AM.ACH.LEAD.DAYS ", startIndex= 887 ,endIndex = 904, valueRegex = None), - Column("UNL POOL", startIndex= 904 ,endIndex = 915, valueRegex = None), - Column("PD RISK", startIndex= 915 ,endIndex = 926, valueRegex = None), - Column("PD RISK DATE.", startIndex= 926 ,endIndex = 940, valueRegex = None), - Column("LGD RISK", startIndex= 940 ,endIndex = 949, valueRegex = None), - Column("LGD DATE", startIndex= 949 ,endIndex = 960, valueRegex = None), - Column("Service By Others", startIndex= 960 ,endIndex = -1, valueRegex = None) -] - -ASSET_COLS: list[Column] = [ - Column("ASSET.#. ", startIndex= 0 ,endIndex = 9, valueRegex = None), - Column("CUST.ID. ", startIndex= 9 ,endIndex = 30, valueRegex = None), - Column("CONTRACT.NO ", startIndex= 30 ,endIndex = 46, valueRegex = None), - Column("CUST.CREDIT.ACCT ", startIndex= 46 ,endIndex = 63, valueRegex = None), - Column("CUST.NAME. ", startIndex= 63 ,endIndex = 84, valueRegex = None), - Column("EQUIP.DESC ", startIndex= 84 ,endIndex = 125, valueRegex = None), - Column("QUANTITY ", startIndex= 125 ,endIndex = 134, valueRegex = None), - Column("NEW.USED ", startIndex= 134 ,endIndex = 143, valueRegex = None), - Column("MODEL. ", startIndex= 143 ,endIndex = 164, valueRegex = None), - Column("A.MANUFACTURER.YEAR ", startIndex= 164 ,endIndex = 184, valueRegex = None), - Column("SERIAL.NUMBER. ", startIndex= 184 ,endIndex = 205, valueRegex = None), - Column("EQUIP.CODE ", startIndex= 205 ,endIndex = 216, valueRegex = None), - Column("EQUIP.CODE.DESC. ", startIndex= 216 ,endIndex = 247, valueRegex = None), - Column("ASSET.VENDOR ", startIndex= 247 ,endIndex = 260, valueRegex = None), - Column("ASSET.VENDOR.NAME. ", startIndex= 260 ,endIndex = 291, valueRegex = None), - Column("MANUFACTURER ", startIndex= 291 ,endIndex = 304, valueRegex = None), - Column("MANUFACT.NAME. ", startIndex= 304 ,endIndex = 335, valueRegex = None), - Column("UATB.EQUIP.ADDR1.45 ", startIndex= 335 ,endIndex = 381, valueRegex = None), - Column("UATB.EQUIP.ADDR2.45 ", startIndex= 381 ,endIndex = 427, valueRegex = None), - Column("EQUIP.CITY. ", startIndex= 427 ,endIndex = 453, valueRegex = None), - Column("EQUIP.STATE ", startIndex= 453 ,endIndex = 465, valueRegex = None), - Column("EQUIP.ZIP. ", startIndex= 465 ,endIndex = 476, valueRegex = None), - Column("STATE.TAX.CODE ", startIndex= 476 ,endIndex = 491, valueRegex = None), - Column("CNTY.TAX.CODE ", startIndex= 491 ,endIndex = 505, valueRegex = None), - Column("CITY.TAX.CODE ", startIndex= 505 ,endIndex = 519, valueRegex = None), - Column("PROP.STATUS ", startIndex= 519 ,endIndex = 531, valueRegex = None), - Column("EQUIP.COST ", startIndex= 531 ,endIndex = 546, valueRegex = None), - Column("EQUIP.COST.PCT ", startIndex= 546 ,endIndex = 561, valueRegex = None), - Column("PUR.OPTION ", startIndex= 561 ,endIndex = 572, valueRegex = None), - Column("PUR.OPTION. ", startIndex= 572 ,endIndex = 588, valueRegex = None), - Column("AS.RECOURSE.CODE ", startIndex= 588 ,endIndex = 605, valueRegex = None), - Column("RESID.AMT. ", startIndex= 605 ,endIndex = 620, valueRegex = None), - Column("BEG.DEPR.DATE ", startIndex= 620 ,endIndex = 634, valueRegex = None), - Column("OPER.LS.BEGIN.DATE ", startIndex= 634 ,endIndex = 653, valueRegex = None), - Column("OPER.LS.LIM ", startIndex= 653 ,endIndex = 665, valueRegex = None), - Column("OPER.LS.SALVAGE ", startIndex= 665 ,endIndex = -1, valueRegex = None) -] - -CUST_COLS: list[Column] = [ - Column("CONTRACT.NO ", startIndex= 0 ,endIndex = 16, valueRegex = None), - Column("CUST.CREDIT.ACCT ", startIndex= 16 ,endIndex = 33, valueRegex = None), - Column("CUST.ID. ", startIndex= 33 ,endIndex = 54, valueRegex = None), - Column("CUST.NAME. ", startIndex= 54 ,endIndex = 105, valueRegex = None), - Column("UATB.CUST.DBA. ", startIndex= 105 ,endIndex = 136, valueRegex = None), - Column("UATB.CUST.ADDRESS1.45 ", startIndex= 136 ,endIndex = 182, valueRegex = None), - Column("UATB.CUST.ADDRESS2.45 ", startIndex= 182 ,endIndex = 228, valueRegex = None), - Column("UATB.CUST.ADDRESS3.45 ", startIndex= 228 ,endIndex = 274, valueRegex = None), - Column("CUST.CITY. ", startIndex= 274 ,endIndex = 295, valueRegex = None), - Column("CUST.STATE ", startIndex= 295 ,endIndex = 306, valueRegex = None), - Column("CUST.ZIP ", startIndex= 306 ,endIndex = 317, valueRegex = None), - Column("GUAR.CODE.1 ", startIndex= 317 ,endIndex = 329, valueRegex = None), - Column("PRIN1/GUAR.NAME.1. ", startIndex= 329 ,endIndex = 365, valueRegex = None), - Column("PRIN1.ADD1. ", startIndex= 365 ,endIndex = 396, valueRegex = None), - Column("PRIN1.ADD2. ", startIndex= 396 ,endIndex = 427, valueRegex = None), - Column("PRIN1.CITY1. ", startIndex= 427 ,endIndex = 453, valueRegex = None), - Column("PRIN1.ST.1. ", startIndex= 453 ,endIndex = 464, valueRegex = None), - Column("ZIP.1. ", startIndex= 464 ,endIndex = 477, valueRegex = None), - Column("FED.ID/SS#1 ", startIndex= 477 ,endIndex = 503, valueRegex = None), - Column("GUAR.CODE.2.PRIN/GUAR.NAME.2. ", startIndex= 503 ,endIndex = 541, valueRegex = None), - Column("PRIN2.ADD2. ", startIndex= 541 ,endIndex = 572, valueRegex = None), - Column("PRIN2.ADDR2 ", startIndex= 572 ,endIndex = 603, valueRegex = None), - Column("PRIN2.CITY2. ", startIndex= 603 ,endIndex = 629, valueRegex = None), - Column("PRIN2.ST.2ZIP.2. ", startIndex= 629 ,endIndex = 653, valueRegex = None), - Column("FED.ID/SS#2 ", startIndex= 653 ,endIndex = 679, valueRegex = None), - Column("BILLING.NAME ", startIndex= 679 ,endIndex = 720, valueRegex = None), - Column("UATB.AR.ADDRESS1.45 ", startIndex= 720 ,endIndex = 766, valueRegex = None), - Column("UATB.AR.ADDRESS2.45 ", startIndex= 766 ,endIndex = 812, valueRegex = None), - Column("UATB.AR.ADDRESS3.45 ", startIndex= 812 ,endIndex = 858, valueRegex = None), - Column("AR.CITY. ", startIndex= 858 ,endIndex = 879, valueRegex = None), - Column("AR.STATE ", startIndex= 879 ,endIndex = 888, valueRegex = None), - Column("AR.ZIP ", startIndex= 888 ,endIndex = 899, valueRegex = None), - Column("AR.ATTN. ", startIndex= 899 ,endIndex = 920, valueRegex = None), - Column("UATB.CR.ATTG.NAME40. ", startIndex= 920 ,endIndex = 961, valueRegex = None), - Column("CR.SCORING ", startIndex= 961 ,endIndex = 972, valueRegex = None), - Column("FACILITY.SCORE ", startIndex= 972 ,endIndex = 988, valueRegex = None), - Column("SIC.CODE ", startIndex= 988 ,endIndex = -1, valueRegex = None), -] - -DOB_COL: list[Column] = [ - Column("CONTRACT.NO ", startIndex= 0 ,endIndex = 16, valueRegex = None), - Column("CUST.CREDIT.ACCT ", startIndex= 16 ,endIndex = 33, valueRegex = None), - Column("CUST.ID. ", startIndex= 33 ,endIndex = 54, valueRegex = None), - Column("GUAR.CODE.1 ", startIndex= 54 ,endIndex = 66, valueRegex = None), - Column("PRIN/GUAR.NAME.1. ", startIndex= 66 ,endIndex = 102, valueRegex = None), - Column("FED.ID/SS#1 ", startIndex= 102 ,endIndex = 128, valueRegex = None), - Column("DOB1 ", startIndex= 128 ,endIndex = 139, valueRegex = None), - Column("GUAR.CODE.2 ", startIndex= 139 ,endIndex = 151, valueRegex = None), - Column("PRIN/GUAR.NAME.2. ", startIndex= 151 ,endIndex = 177, valueRegex = None), - Column("FED.ID/SS#2 ", startIndex= 177 ,endIndex = -1, valueRegex = None) -] - -def parse(ILOutput: str, columns: list[Column], dataColumnRegex: str = CONTRACT_NO_REGEX) -> DataFrame : - debug(ILOutput) - lines = ILOutput.splitlines() - dataDict = {} - for index, line in enumerate(lines): - debug(f"Index: {index} | {line}") - debug(re.search(dataColumnRegex, line)) - if re.search(dataColumnRegex, line) == None: continue - for col in columns: - name, value = col.extract_column(line) - debug(f"name: {name} | value: {value}") - try: - dataDict[name].append(value) - except: - dataDict[name] = [value] - debug(dataDict) - try: - dataframe = DataFrame(dataDict) - except ValueError as ve: - debug({c: len(dataDict[c]) for c in dataDict.keys()}) - debug(ve) - return dataframe - - -# extracts = [("FIN", FIN_COLUMNS), ("ASSET", ASSET_COLS), ("CUST", CUST_COLS), ("DOB", DOB_COL)] - -# for file, columns in extracts: -# with open(f"Inputs/{file}", errors="replace") as reportFile: -# report: str = reportFile.read() -# # Removes characters that cause errors -# report: str = report.replace("^"," ") -# dataframe: DataFrame = parse(ILOutput=report, columns=columns) -# print(f"{file} dataframe: {dataframe}") \ No newline at end of file +COLUMN_NAME_REGEX = re.compile(r"(?P(\w|\.|#|\/)+)", re.IGNORECASE) + +def replace_bad_cols(line: str, cols: list[str]): + """ + Replaces bad column names in a string with modified names that have spaces replaced with dots. + + Args: + line (str): The string containing the column names to modify. + cols (list[str]): A list of column names to modify. + + Returns: + str: The modified string with bad column names replaced. + """ + for c in cols: + # Replace spaces with dots in the column name + gc = c.replace(' ', '.') + # Replace the bad column name with the modified column name in the string + line = line.replace(c, gc) + return line + + +def extract_data(input_doc: str, column_list: list[str]): + """ + Extracts data from a string in a table-like format, where columns are identified by a list of column names, and + returns the data as a Pandas DataFrame. + + Args: + input_doc (str): The string containing the table-like data to extract. + column_list (list[str]): A list of column names to identify the columns in the table-like data. + + Returns: + pandas.DataFrame: A DataFrame containing the extracted data from the input string. + """ + line: str + columns = {} + data = {} + for line in input_doc.splitlines(): + if len(columns) == 0 : + # Find the line that contains the column names and replace bad column names + if re.search("^\w", line): + line = replace_bad_cols(line, column_list) + # Find the start and end positions of each column name and store them in a dictionary + columns_names = re.finditer(COLUMN_NAME_REGEX, line) + for c in columns_names: + columns[c.group("column_name")] = {"start": c.start(), "end": c.end()} + data[c.group("column_name")] = [] + continue + elif len(line) < 2: + continue + # Check if we've reached the end of the table and return the data + if re.search("\d+ records listed", line): + return DataFrame(data) + # Extract the data from each column based on the start and end positions + for key, span in columns.items(): + data[key].append(line[span["start"]:span["end"]].strip()) \ No newline at end of file diff --git a/PortfolioParser.spec b/PortfolioParser.spec new file mode 100644 index 0000000..caff0a4 --- /dev/null +++ b/PortfolioParser.spec @@ -0,0 +1,50 @@ +# -*- mode: python ; coding: utf-8 -*- + + +block_cipher = None + + +a = Analysis( + ['IL Formatter.py'], + pathex=[], + binaries=[], + datas=[('config.json', '.')], + hiddenimports=[], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False, +) +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE( + pyz, + a.scripts, + [], + exclude_binaries=True, + name='PortfolioParser', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) +coll = COLLECT( + exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=True, + upx_exclude=[], + name='PortfolioParser', +) diff --git a/config.json b/config.json index 2e877ac..42ab9ab 100644 --- a/config.json +++ b/config.json @@ -1 +1 @@ -{"loggingLevel": "ERROR", "directories": {"ASSET": null, "CUST": null, "DOB": null, "FIN": null, "output": null}} \ No newline at end of file +{"loggingLevel": "ERROR", "directories": {"ASSET": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "CUST": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "DOB": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "FIN": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs", "output": "C:/Users/glott/OneDrive - LEAF Commercial Capital/Documents/0 In Progess/Portfolio/Automation/IL Formatter/Inputs"}, "COLS": ["CUST ID", "CONTRACT NO", "BUSINESS TYPE", "FED ID", "CUST CREDIT ACCT", "CUSTOMER", "LEASE TYPE", "EQUIPMENT COST", "CBR", "NET INVESTMENT", "ANNUAL COMBINED IRR", "CONTRACT TERM", "INCOME START DATE", "FIRST PYMT DATE", "FIRST PYMT AMT", "CONTRACT PYMT", "INVOICE CODE", "INV DAYS", "INV DUE DAY", "SEC DEPOSIT", "IDC AMOUNTS", "IDC DATES", "RESIDUAL", "MANAGERS RESIDUAL", "PROMOTION", "PRODUCT LINE", "REGION", "REGION DESC", "BRANCH", "BUSINESS SEGMENT", "LEAD BANK", "MRKTNG REP", "MRKTNG REGION", "REMIT TO", "PYMT OPTION", "BANK CODE", "TAPE BANK NUM", "TAPE ACCOUNT NUM", "TAPE ACCT TYPE", "DEALER", "PRIVATE LABEL", "RESID METHOD", "LATE CHRG EXMPT", "INSURANCE CODE", "VARIABLE DATE", "VARIABLE RATE", "BILLING CYCLE", "UM USER DATE2", "CR ATTG PHONE", "GROSS CONTRACT", "ADV ", "PD AMT FINANCED", "PD INCOME START DATE", "INVOICE DESC", "VARIABLE PYMT CODE", "PD PAYMENT AMT", "QUOTE BUYOUT", "LATE CHARGE CODE", "LATE CHRG RATE", "M DEF COLLECTOR", "AM ACH LEAD DAYS", "UNL POOL", "PD RISK DATE", "PD RISK", "LGD RISK", "LGD DATE", "Service By Others", "CONTRACT NO", "CUST CREDIT ACCT", "CUST ID", "CUST NAME", "UATB CUST DBA", "UATB CUST ADDRESS1 45", "UATB CUST ADDRESS2 45", "UATB CUST ADDRESS3 45", "CUST CITY", "CUST STATE", "CUST ZIP", "GUAR CODE 1", "PRIN1/GUAR NAME 1", "PRIN1 ADD1", "PRIN1 ADD2", "PRIN1 CITY1", "PRIN1 ST 1", "ZIP 1", "FED ID/SS#1", "GUAR CODE 2 PRIN/GUAR NAME 2", "PRIN2 ADD2", "PRIN2 ADDR2", "PRIN2 CITY2", "PRIN2 ST 2ZIP 2", "FED ID/SS#2", "BILLING NAME", "UATB AR ADDRESS1 45", "UATB AR ADDRESS2 45", "UATB AR ADDRESS3 45", "AR CITY", "AR STATE", "AR ZIP", "AR ATTN", "UATB CR ATTG NAME40", "CR SCORING", "FACILITY SCORE", "SIC CODE", "ASSET #", "EQUIP DESC", "QUANTITY", "NEW USED", "MODEL", "A MANUFACTURER YEAR", "SERIAL NUMBER", "EQUIP CODE", "EQUIP CODE DESC", "ASSET VENDOR", "ASSET VENDOR NAME", "MANUFACTURER", "MANUFACT NAME", "UATB EQUIP ADDR1 45", "UATB EQUIP ADDR2 45", "EQUIP CITY", "EQUIP STATE", "EQUIP ZIP", "STATE TAX CODE", "CNTY TAX CODE", "CITY TAX CODE", "PROP STATUS", "EQUIP COST", "EQUIP COST PCT", "PUR OPTION", "PUR OPTION", "AS RECOURSE CODE", "RESID AMT", "BEG DEPR DATE", "OPER LS BEGIN DATE", "OPER LS LIM", "OPER LS SALVAGE", "PRIN/GUAR NAME 1", "DOB1", "GUAR CODE 2", "PRIN/GUAR NAME 2", "DOB2"]} \ No newline at end of file diff --git a/todo.txt b/todo.txt index 5579448..8044c60 100644 --- a/todo.txt +++ b/todo.txt @@ -1,4 +1,2 @@ -[X] Working log level button -[X] Open file button -[X] Error Dialog -[X] Directory memory \ No newline at end of file +[ ] Notification on completion +[ ] Icons \ No newline at end of file