version 4.0 staging | New header relational positioning class structure

back_report_creation 3.2
= 3 years ago
parent a3905d118e
commit 5caaf3d7ac
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 3
      .gitignore
  2. 8
      IL Extract.spec
  3. 1
      report_config_termplate.toml
  4. BIN
      requirements.txt
  5. 12
      src/datasets/dataset_template.json
  6. 184
      src/extractors.py
  7. 2
      src/il_extract.py
  8. 3
      src/ui_ile_main_window.py

3
.gitignore vendored

@ -10,4 +10,5 @@ __pycache__/
*.txt
*.md
!todo.md
!todo.md
!requirements.txt

@ -5,10 +5,10 @@ block_cipher = None
a = Analysis(
['main.py'],
pathex=[],
['src/il_extract.py'],
pathex=['src'],
binaries=[],
datas=[('assets/extract.svg', '.'), ('assets/process.svg', '.'), ('assets/folder.svg', '.'), ('assets/copy.svg', '.'), ('settings.json', '.')],
datas=[('src/assets/*', 'assets'), ('settings.json', '.')],
hiddenimports=[],
hookspath=[],
hooksconfig={},
@ -37,7 +37,7 @@ exe = EXE(
target_arch=None,
codesign_identity=None,
entitlements_file=None,
icon='assets\\extract.ico',
icon='src/assets/extract.ico',
)
coll = COLLECT(
exe,

@ -0,0 +1 @@
name = "Test Name"

Binary file not shown.

@ -0,0 +1,12 @@
{
"name": {
"report": "",
"excel": ""
},
"relative_position": {
"rows": 0,
"col": 0
},
"length": 0,
"data_type": "int"
}

@ -0,0 +1,184 @@
from typing import TypeAlias, TypeVar
from dataclasses import dataclass
from pathlib import Path
import pathlib as pl
from abc import ABC, abstractmethod, abstractproperty
from re import search, match, compile, Match, Pattern
from enum import Enum
ColumnIndex: TypeAlias = int
Money: TypeAlias = float
Numeric = TypeVar("Numeric", float, int)
class Line(Enum):
Header: str
Data: str
Erroneous: str
Top: str
Bottom: str
@dataclass
class RelativePosition:
"""
Coordinates for navigating from one point in a row to another
"""
rows: int
col: ColumnIndex
@dataclass
class DataValue:
position: RelativePosition
length : int
regex: Pattern
dtype: type
def correct_line(self, adj_lines_since_header: int) -> bool:
"""
"""
return adj_lines_since_header % self.position.rows == 0
def _line_slice(self, line: Line.Data) -> str|None:
"""
Attempts to get the data from the line.
Returns string in correct postion or None if out of range.
"""
try:
start: int = self.position.col
end: int = start + self.length
line_slice: str = line[start:end]
except IndexError:
#TODO: Add logging
line_slice = None
finally:
return line_slice
@staticmethod
def _to_float(number_str: str) -> float|None:
try:
f_value:float = float(number_str.replace(',',''))
return f_value
except:
return None
def extract(self, line: Line.Data) -> type|None:
"""
"""
line_slice: str|None = self._line_slice(line)
if isinstance(line_slice, None):
return None
value_match: Match|None = search(self.regex, line_slice)
if isinstance(value_match, None):
return None
value_str: str = value_match.group()
value_str.strip()
if self.dtype == int or self.dtype == float:
return self._to_float(value_str)
#TODO datetime
return value_str
class DataSet:
def __init__(self, config: dict) -> None:
self.r_name = config["naming"]["report"]
try:
self.e_name = config["naming"]["excel"]
except KeyError:
self.e_name = self.r_name
self.data_value: DataValue = DataValue(
position = RelativePosition(
rows= config["relative_position"]["rows"],
col= config["relative_position"]["col"]
),
length = config["length"],
dtype = config["data_type"],
)
def line_position(self, line: str) -> ColumnIndex|None:
"""
Searches a line for the report header for this dataset.
Returns:
- ColumnIndex(int) | None: The column index of the matches end position
or None if no match was found
"""
header_match: Match|None = search(self.r_name, line)
return header_match.end() if isinstance(header_match, Match) else None
@dataclass
class ReportConfig:
file_extension: str
name: str
datasets: list[DataSet]
data_line_regexes: list[Pattern]
class ILReport(ABC):
def __init__(self, file_path: Path, report_config: ReportConfig) -> None:
self.in_file_path: Path = file_path
self.line_gen = self._line_generator(file_path)
self.config: ReportConfig = report_config
self.name = report_config.name
self.line_type_history: list[Line] = []
self.last_header_line: int|None = None
self.data_dict: dict = {
header.e_name: []
for header in self.config.datasets
}
@staticmethod
def _line_generator(file_path: Path):
with open(file_path, 'r') as in_file:
line: str
for line in in_file.readlines():
yield line
def _add_line_history(self, line: Line, max_history: int = 10):
self.line_type_history.append(line)
while len(self.line_type_history) > max_history:
self.line_type_history.pop(0)
def _is_header_line(self, line: str) -> bool:
"""
Checks whether a report line has data headers.
"""
regex: Pattern
for regex in self.config.data_line_regexes:
if isinstance(search(regex,line), Match):
return True
return False
@abstractmethod
def _skip_line(self, line) -> bool:
"""
Tells whether we should skip this line
"""
@abstractmethod
def _process_line(self):
"""
"""
@abstractmethod
def _process_dataline(self, dataline: Line.Data):
"""
"""
# Search the row for a data set name, or list of data set names
# extract all the data until the next row
if __name__ == "__main__":
datasets = []

@ -14,7 +14,7 @@ with open("settings.json") as s:
#if settings["debug"]:
basicConfig(filename='debug.log', mode='w', encoding='utf-8', level=DEBUG)
debug("\n\n\n########################### VERSION = 3.10 ###########################\n\n\n")
debug("\n\n\n########################### VERSION = 3.2 ###########################\n\n\n")
debug("Running main.py...")
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
"""

@ -1,3 +1,6 @@
"""
The user interface set up for the main window of the application
"""
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'ILE_MainWindow.ui'

Loading…
Cancel
Save