From 5caaf3d7ace349948566e3381a1163b57447ebbd Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 1 May 2023 08:10:09 -0400 Subject: [PATCH] version 4.0 staging | New header relational positioning class structure --- .gitignore | 3 +- IL Extract.spec | 8 +- report_config_termplate.toml | 1 + requirements.txt | Bin 0 -> 606 bytes src/datasets/dataset_template.json | 12 ++ src/extractors.py | 184 +++++++++++++++++++++++++++++ src/il_extract.py | 2 +- src/ui_ile_main_window.py | 3 + 8 files changed, 207 insertions(+), 6 deletions(-) create mode 100644 report_config_termplate.toml create mode 100644 requirements.txt create mode 100644 src/datasets/dataset_template.json create mode 100644 src/extractors.py diff --git a/.gitignore b/.gitignore index aabeb30..2bbf53a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ __pycache__/ *.txt *.md -!todo.md \ No newline at end of file +!todo.md +!requirements.txt \ No newline at end of file diff --git a/IL Extract.spec b/IL Extract.spec index 7c5a649..8fd3250 100644 --- a/IL Extract.spec +++ b/IL Extract.spec @@ -5,10 +5,10 @@ block_cipher = None a = Analysis( - ['main.py'], - pathex=[], + ['src/il_extract.py'], + pathex=['src'], binaries=[], - datas=[('assets/extract.svg', '.'), ('assets/process.svg', '.'), ('assets/folder.svg', '.'), ('assets/copy.svg', '.'), ('settings.json', '.')], + datas=[('src/assets/*', 'assets'), ('settings.json', '.')], hiddenimports=[], hookspath=[], hooksconfig={}, @@ -37,7 +37,7 @@ exe = EXE( target_arch=None, codesign_identity=None, entitlements_file=None, - icon='assets\\extract.ico', + icon='src/assets/extract.ico', ) coll = COLLECT( exe, diff --git a/report_config_termplate.toml b/report_config_termplate.toml new file mode 100644 index 0000000..8995213 --- /dev/null +++ b/report_config_termplate.toml @@ -0,0 +1 @@ +name = "Test Name" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3dd71439ca0e413fb68a6b6fb26ee11110227534 GIT binary patch literal 606 zcmZuuTW*6u5c7AX9)&0@&7&W^Lwf|$(zHslARa2@@NMnc7hd>mu|bLhYlcTWpdcebj~lt4_+~APml|(FzE~a|85M>Jb9kCGN+Jb0!h0&n zCyV{g`YR^z_+V`ebwooFsjGYG?T9&35bqEb2B!1uZ>e&%oV97~%xS5%#!NVW$>&0a z1n>OvRH~_Yiiu`VWqRc-Z)EHi9&&oNG&7p4vcl9$Rzs)Cv{KHd8-yC>ALyifp4qge zmK*6hlkNb5{8P literal 0 HcmV?d00001 diff --git a/src/datasets/dataset_template.json b/src/datasets/dataset_template.json new file mode 100644 index 0000000..7a7774e --- /dev/null +++ b/src/datasets/dataset_template.json @@ -0,0 +1,12 @@ +{ + "name": { + "report": "", + "excel": "" + }, + "relative_position": { + "rows": 0, + "col": 0 + }, + "length": 0, + "data_type": "int" +} \ No newline at end of file diff --git a/src/extractors.py b/src/extractors.py new file mode 100644 index 0000000..ca7f1c6 --- /dev/null +++ b/src/extractors.py @@ -0,0 +1,184 @@ +from typing import TypeAlias, TypeVar +from dataclasses import dataclass +from pathlib import Path +import pathlib as pl +from abc import ABC, abstractmethod, abstractproperty +from re import search, match, compile, Match, Pattern +from enum import Enum + +ColumnIndex: TypeAlias = int +Money: TypeAlias = float + +Numeric = TypeVar("Numeric", float, int) + +class Line(Enum): + Header: str + Data: str + Erroneous: str + Top: str + Bottom: str + + + +@dataclass +class RelativePosition: + """ + Coordinates for navigating from one point in a row to another + """ + rows: int + col: ColumnIndex + +@dataclass +class DataValue: + + position: RelativePosition + length : int + regex: Pattern + dtype: type + + def correct_line(self, adj_lines_since_header: int) -> bool: + """ + """ + return adj_lines_since_header % self.position.rows == 0 + + def _line_slice(self, line: Line.Data) -> str|None: + """ + Attempts to get the data from the line. + Returns string in correct postion or None if out of range. + """ + try: + start: int = self.position.col + end: int = start + self.length + line_slice: str = line[start:end] + except IndexError: + #TODO: Add logging + line_slice = None + finally: + return line_slice + + @staticmethod + def _to_float(number_str: str) -> float|None: + try: + f_value:float = float(number_str.replace(',','')) + return f_value + except: + return None + + def extract(self, line: Line.Data) -> type|None: + """ + """ + line_slice: str|None = self._line_slice(line) + if isinstance(line_slice, None): + return None + + value_match: Match|None = search(self.regex, line_slice) + if isinstance(value_match, None): + return None + + value_str: str = value_match.group() + + value_str.strip() + if self.dtype == int or self.dtype == float: + return self._to_float(value_str) + #TODO datetime + return value_str + +class DataSet: + + def __init__(self, config: dict) -> None: + self.r_name = config["naming"]["report"] + try: + self.e_name = config["naming"]["excel"] + except KeyError: + self.e_name = self.r_name + + self.data_value: DataValue = DataValue( + position = RelativePosition( + rows= config["relative_position"]["rows"], + col= config["relative_position"]["col"] + ), + length = config["length"], + dtype = config["data_type"], + ) + + def line_position(self, line: str) -> ColumnIndex|None: + """ + Searches a line for the report header for this dataset. + + Returns: + - ColumnIndex(int) | None: The column index of the matches end position + or None if no match was found + """ + header_match: Match|None = search(self.r_name, line) + return header_match.end() if isinstance(header_match, Match) else None + +@dataclass +class ReportConfig: + + file_extension: str + name: str + datasets: list[DataSet] + data_line_regexes: list[Pattern] + + +class ILReport(ABC): + + def __init__(self, file_path: Path, report_config: ReportConfig) -> None: + self.in_file_path: Path = file_path + self.line_gen = self._line_generator(file_path) + + self.config: ReportConfig = report_config + self.name = report_config.name + + self.line_type_history: list[Line] = [] + self.last_header_line: int|None = None + + self.data_dict: dict = { + header.e_name: [] + for header in self.config.datasets + } + + @staticmethod + def _line_generator(file_path: Path): + with open(file_path, 'r') as in_file: + line: str + for line in in_file.readlines(): + yield line + + def _add_line_history(self, line: Line, max_history: int = 10): + self.line_type_history.append(line) + while len(self.line_type_history) > max_history: + self.line_type_history.pop(0) + + def _is_header_line(self, line: str) -> bool: + """ + Checks whether a report line has data headers. + """ + regex: Pattern + for regex in self.config.data_line_regexes: + if isinstance(search(regex,line), Match): + return True + return False + + @abstractmethod + def _skip_line(self, line) -> bool: + """ + Tells whether we should skip this line + """ + + @abstractmethod + def _process_line(self): + """ + + """ + + @abstractmethod + def _process_dataline(self, dataline: Line.Data): + """ + """ + +# Search the row for a data set name, or list of data set names +# extract all the data until the next row + +if __name__ == "__main__": + datasets = [] \ No newline at end of file diff --git a/src/il_extract.py b/src/il_extract.py index 2dae845..7d50914 100644 --- a/src/il_extract.py +++ b/src/il_extract.py @@ -14,7 +14,7 @@ with open("settings.json") as s: #if settings["debug"]: basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG) -debug("\n\n\n########################### VERSION = 3.10 ###########################\n\n\n") +debug("\n\n\n########################### VERSION = 3.2 ###########################\n\n\n") debug("Running main.py...") class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow): """ diff --git a/src/ui_ile_main_window.py b/src/ui_ile_main_window.py index 7e5df2c..2733def 100644 --- a/src/ui_ile_main_window.py +++ b/src/ui_ile_main_window.py @@ -1,3 +1,6 @@ +""" +The user interface set up for the main window of the application +""" # -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'ILE_MainWindow.ui'