From 5caaf3d7ace349948566e3381a1163b57447ebbd Mon Sep 17 00:00:00 2001
From: = <=>
Date: Mon, 1 May 2023 08:10:09 -0400
Subject: [PATCH] version 4.0 staging | New header relational positioning class
 structure

---
 .gitignore                         |   3 +-
 IL Extract.spec                    |   8 +-
 report_config_termplate.toml       |   1 +
 requirements.txt                   | Bin 0 -> 606 bytes
 src/datasets/dataset_template.json |  12 ++
 src/extractors.py                  | 184 +++++++++++++++++++++++++++++
 src/il_extract.py                  |   2 +-
 src/ui_ile_main_window.py          |   3 +
 8 files changed, 207 insertions(+), 6 deletions(-)
 create mode 100644 report_config_termplate.toml
 create mode 100644 requirements.txt
 create mode 100644 src/datasets/dataset_template.json
 create mode 100644 src/extractors.py

diff --git a/.gitignore b/.gitignore
index aabeb30..2bbf53a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ __pycache__/
 *.txt
 *.md
 
-!todo.md
\ No newline at end of file
+!todo.md
+!requirements.txt
\ No newline at end of file
diff --git a/IL Extract.spec b/IL Extract.spec
index 7c5a649..8fd3250 100644
--- a/IL Extract.spec	
+++ b/IL Extract.spec	
@@ -5,10 +5,10 @@ block_cipher = None
 
 
 a = Analysis(
-    ['main.py'],
-    pathex=[],
+    ['src/il_extract.py'],
+    pathex=['src'],
     binaries=[],
-    datas=[('assets/extract.svg', '.'), ('assets/process.svg', '.'), ('assets/folder.svg', '.'), ('assets/copy.svg', '.'), ('settings.json', '.')],
+    datas=[('src/assets/*', 'assets'), ('settings.json', '.')],
     hiddenimports=[],
     hookspath=[],
     hooksconfig={},
@@ -37,7 +37,7 @@ exe = EXE(
     target_arch=None,
     codesign_identity=None,
     entitlements_file=None,
-    icon='assets\\extract.ico',
+    icon='src/assets/extract.ico',
 )
 coll = COLLECT(
     exe,
diff --git a/report_config_termplate.toml b/report_config_termplate.toml
new file mode 100644
index 0000000..8995213
--- /dev/null
+++ b/report_config_termplate.toml
@@ -0,0 +1 @@
+name = "Test Name"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dd71439ca0e413fb68a6b6fb26ee11110227534
GIT binary patch
literal 606
zcmZuuTW*6u5c7AX9)&0@&7&W^Lwf|$(zHslARa2@@NMnc<?(?KBZNJ+XXfuW#}zHk
zsF9<>7hd>mu|bLhYlcTWpdcebj~lt4_+~APml|(FzE~a|85M>Jb9kCGN+Jb0!h0&n
zCyV{g`YR^z_+V`ebwooFsjGYG?T9&35bqEb2B!1uZ>e&%oV97~%xS5%#!NVW$>&0a
z1n>OvRH~_Yiiu`VWqRc-Z)EHi9&&oNG&7p4vcl9$Rzs)Cv{KHd8-yC>ALyifp4qge
zmK*6<bq{S*+#}y{o8sRiGUg8V)D3=YQyJNswB;XdIBZ^fl3KW{QuQ&FGVLcs`XNca
G>hlkNb5{8P

literal 0
HcmV?d00001

diff --git a/src/datasets/dataset_template.json b/src/datasets/dataset_template.json
new file mode 100644
index 0000000..7a7774e
--- /dev/null
+++ b/src/datasets/dataset_template.json
@@ -0,0 +1,12 @@
+{
+    "name": {
+        "report": "",
+        "excel": ""
+    },
+    "relative_position": {
+        "rows": 0,
+        "col": 0
+    },
+    "length": 0,
+    "data_type": "int"
+}
\ No newline at end of file
diff --git a/src/extractors.py b/src/extractors.py
new file mode 100644
index 0000000..ca7f1c6
--- /dev/null
+++ b/src/extractors.py
@@ -0,0 +1,184 @@
+from typing import TypeAlias, TypeVar
+from dataclasses import dataclass
+from pathlib import Path
+import pathlib as pl
+from abc import ABC, abstractmethod, abstractproperty
+from re import search, match, compile, Match, Pattern
+from enum import Enum
+
+ColumnIndex: TypeAlias = int
+Money: TypeAlias = float
+
+Numeric = TypeVar("Numeric", float, int)
+
+class Line(Enum):
+    Header: str
+    Data: str
+    Erroneous: str
+    Top: str
+    Bottom: str
+
+
+
+@dataclass
+class RelativePosition:
+    """
+    Coordinates for navigating from one point in a row to another
+    """
+    rows: int
+    col: ColumnIndex
+
+@dataclass
+class DataValue:
+ 
+    position: RelativePosition
+    length : int
+    regex: Pattern
+    dtype: type
+
+    def correct_line(self, adj_lines_since_header: int) -> bool:
+        """
+        """
+        return adj_lines_since_header % self.position.rows == 0
+    
+    def _line_slice(self, line: Line.Data) -> str|None:
+        """
+        Attempts to get the data from the line.
+        Returns string in correct postion or None if out of range.
+        """
+        try:
+            start: int = self.position.col
+            end: int = start + self.length
+            line_slice: str = line[start:end]
+        except IndexError:
+            #TODO: Add logging
+            line_slice = None
+        finally:
+            return line_slice
+    
+    @staticmethod
+    def _to_float(number_str: str) -> float|None:
+        try:
+            f_value:float = float(number_str.replace(',',''))
+            return f_value
+        except:
+            return None
+
+    def extract(self, line: Line.Data) -> type|None:
+        """
+        """
+        line_slice: str|None = self._line_slice(line)
+        if isinstance(line_slice, None): 
+            return None
+        
+        value_match: Match|None = search(self.regex, line_slice)
+        if isinstance(value_match, None): 
+            return None
+        
+        value_str: str = value_match.group()
+        
+        value_str.strip()
+        if self.dtype == int or self.dtype == float:
+            return self._to_float(value_str)
+        #TODO datetime
+        return value_str
+
+class DataSet:
+
+    def __init__(self, config: dict) -> None:
+        self.r_name = config["naming"]["report"]
+        try:
+            self.e_name = config["naming"]["excel"]
+        except KeyError:
+            self.e_name = self.r_name
+
+        self.data_value: DataValue = DataValue(
+            position = RelativePosition(
+               rows= config["relative_position"]["rows"],
+               col= config["relative_position"]["col"]
+            ),
+            length = config["length"],
+            dtype = config["data_type"],
+        )
+
+    def line_position(self, line: str) -> ColumnIndex|None:
+        """
+        Searches a line for the report header for this dataset.
+
+        Returns:
+            - ColumnIndex(int) | None: The column index of the matches end position
+            or None if no match was found
+        """
+        header_match: Match|None = search(self.r_name, line)
+        return header_match.end() if isinstance(header_match, Match) else None
+
+@dataclass
+class ReportConfig:
+
+    file_extension: str
+    name: str
+    datasets: list[DataSet]
+    data_line_regexes: list[Pattern]
+
+
+class ILReport(ABC):
+
+    def __init__(self, file_path: Path, report_config: ReportConfig) -> None:
+        self.in_file_path: Path = file_path
+        self.line_gen = self._line_generator(file_path)
+        
+        self.config: ReportConfig = report_config
+        self.name = report_config.name
+
+        self.line_type_history: list[Line] = []
+        self.last_header_line: int|None = None
+
+        self.data_dict: dict = {
+            header.e_name: [] 
+            for header in self.config.datasets 
+        }
+        
+    @staticmethod
+    def _line_generator(file_path: Path):
+        with open(file_path, 'r') as in_file:
+            line: str
+            for line in in_file.readlines():
+                yield line
+
+    def _add_line_history(self, line: Line, max_history: int = 10):
+        self.line_type_history.append(line)
+        while len(self.line_type_history) > max_history:
+            self.line_type_history.pop(0)
+
+    def _is_header_line(self, line: str) -> bool:
+        """
+        Checks whether a report line has data headers.
+        """
+        regex: Pattern
+        for regex in self.config.data_line_regexes:
+            if isinstance(search(regex,line), Match):
+                return True
+        return False
+    
+    @abstractmethod
+    def _skip_line(self, line) -> bool:
+        """
+        Tells whether we should skip this line
+        """
+
+    @abstractmethod
+    def _process_line(self):
+        """
+        
+        """
+    
+    @abstractmethod
+    def _process_dataline(self, dataline: Line.Data):
+        """
+        """
+        
+# Search the row for a data set name, or list of data set names
+# extract all the data until the next row
+
+if __name__ == "__main__":
+    datasets = []
\ No newline at end of file
diff --git a/src/il_extract.py b/src/il_extract.py
index 2dae845..7d50914 100644
--- a/src/il_extract.py
+++ b/src/il_extract.py
@@ -14,7 +14,7 @@ with open("settings.json") as s:
     #if settings["debug"]:
     basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG)
 
-debug("\n\n\n########################### VERSION = 3.10 ###########################\n\n\n")
+debug("\n\n\n########################### VERSION = 3.2 ###########################\n\n\n")
 debug("Running main.py...")
 class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
     """
diff --git a/src/ui_ile_main_window.py b/src/ui_ile_main_window.py
index 7e5df2c..2733def 100644
--- a/src/ui_ile_main_window.py
+++ b/src/ui_ile_main_window.py
@@ -1,3 +1,6 @@
+"""
+The user interface set up for the main window of the application
+"""
 # -*- coding: utf-8 -*-
 
 # Form implementation generated from reading ui file 'ILE_MainWindow.ui'