parent
a3905d118e
commit
5caaf3d7ac
@ -0,0 +1 @@ |
||||
name = "Test Name" |
||||
Binary file not shown.
@ -0,0 +1,12 @@ |
||||
{ |
||||
"name": { |
||||
"report": "", |
||||
"excel": "" |
||||
}, |
||||
"relative_position": { |
||||
"rows": 0, |
||||
"col": 0 |
||||
}, |
||||
"length": 0, |
||||
"data_type": "int" |
||||
} |
||||
@ -0,0 +1,184 @@ |
||||
from typing import TypeAlias, TypeVar |
||||
from dataclasses import dataclass |
||||
from pathlib import Path |
||||
import pathlib as pl |
||||
from abc import ABC, abstractmethod, abstractproperty |
||||
from re import search, match, compile, Match, Pattern |
||||
from enum import Enum |
||||
|
||||
ColumnIndex: TypeAlias = int |
||||
Money: TypeAlias = float |
||||
|
||||
Numeric = TypeVar("Numeric", float, int) |
||||
|
||||
class Line(Enum): |
||||
Header: str |
||||
Data: str |
||||
Erroneous: str |
||||
Top: str |
||||
Bottom: str |
||||
|
||||
|
||||
|
||||
@dataclass |
||||
class RelativePosition: |
||||
""" |
||||
Coordinates for navigating from one point in a row to another |
||||
""" |
||||
rows: int |
||||
col: ColumnIndex |
||||
|
||||
@dataclass |
||||
class DataValue: |
||||
|
||||
position: RelativePosition |
||||
length : int |
||||
regex: Pattern |
||||
dtype: type |
||||
|
||||
def correct_line(self, adj_lines_since_header: int) -> bool: |
||||
""" |
||||
""" |
||||
return adj_lines_since_header % self.position.rows == 0 |
||||
|
||||
def _line_slice(self, line: Line.Data) -> str|None: |
||||
""" |
||||
Attempts to get the data from the line. |
||||
Returns string in correct postion or None if out of range. |
||||
""" |
||||
try: |
||||
start: int = self.position.col |
||||
end: int = start + self.length |
||||
line_slice: str = line[start:end] |
||||
except IndexError: |
||||
#TODO: Add logging |
||||
line_slice = None |
||||
finally: |
||||
return line_slice |
||||
|
||||
@staticmethod |
||||
def _to_float(number_str: str) -> float|None: |
||||
try: |
||||
f_value:float = float(number_str.replace(',','')) |
||||
return f_value |
||||
except: |
||||
return None |
||||
|
||||
def extract(self, line: Line.Data) -> type|None: |
||||
""" |
||||
""" |
||||
line_slice: str|None = self._line_slice(line) |
||||
if isinstance(line_slice, None): |
||||
return None |
||||
|
||||
value_match: Match|None = search(self.regex, line_slice) |
||||
if isinstance(value_match, None): |
||||
return None |
||||
|
||||
value_str: str = value_match.group() |
||||
|
||||
value_str.strip() |
||||
if self.dtype == int or self.dtype == float: |
||||
return self._to_float(value_str) |
||||
#TODO datetime |
||||
return value_str |
||||
|
||||
class DataSet: |
||||
|
||||
def __init__(self, config: dict) -> None: |
||||
self.r_name = config["naming"]["report"] |
||||
try: |
||||
self.e_name = config["naming"]["excel"] |
||||
except KeyError: |
||||
self.e_name = self.r_name |
||||
|
||||
self.data_value: DataValue = DataValue( |
||||
position = RelativePosition( |
||||
rows= config["relative_position"]["rows"], |
||||
col= config["relative_position"]["col"] |
||||
), |
||||
length = config["length"], |
||||
dtype = config["data_type"], |
||||
) |
||||
|
||||
def line_position(self, line: str) -> ColumnIndex|None: |
||||
""" |
||||
Searches a line for the report header for this dataset. |
||||
|
||||
Returns: |
||||
- ColumnIndex(int) | None: The column index of the matches end position |
||||
or None if no match was found |
||||
""" |
||||
header_match: Match|None = search(self.r_name, line) |
||||
return header_match.end() if isinstance(header_match, Match) else None |
||||
|
||||
@dataclass |
||||
class ReportConfig: |
||||
|
||||
file_extension: str |
||||
name: str |
||||
datasets: list[DataSet] |
||||
data_line_regexes: list[Pattern] |
||||
|
||||
|
||||
class ILReport(ABC): |
||||
|
||||
def __init__(self, file_path: Path, report_config: ReportConfig) -> None: |
||||
self.in_file_path: Path = file_path |
||||
self.line_gen = self._line_generator(file_path) |
||||
|
||||
self.config: ReportConfig = report_config |
||||
self.name = report_config.name |
||||
|
||||
self.line_type_history: list[Line] = [] |
||||
self.last_header_line: int|None = None |
||||
|
||||
self.data_dict: dict = { |
||||
header.e_name: [] |
||||
for header in self.config.datasets |
||||
} |
||||
|
||||
@staticmethod |
||||
def _line_generator(file_path: Path): |
||||
with open(file_path, 'r') as in_file: |
||||
line: str |
||||
for line in in_file.readlines(): |
||||
yield line |
||||
|
||||
def _add_line_history(self, line: Line, max_history: int = 10): |
||||
self.line_type_history.append(line) |
||||
while len(self.line_type_history) > max_history: |
||||
self.line_type_history.pop(0) |
||||
|
||||
def _is_header_line(self, line: str) -> bool: |
||||
""" |
||||
Checks whether a report line has data headers. |
||||
""" |
||||
regex: Pattern |
||||
for regex in self.config.data_line_regexes: |
||||
if isinstance(search(regex,line), Match): |
||||
return True |
||||
return False |
||||
|
||||
@abstractmethod |
||||
def _skip_line(self, line) -> bool: |
||||
""" |
||||
Tells whether we should skip this line |
||||
""" |
||||
|
||||
@abstractmethod |
||||
def _process_line(self): |
||||
""" |
||||
|
||||
""" |
||||
|
||||
@abstractmethod |
||||
def _process_dataline(self, dataline: Line.Data): |
||||
""" |
||||
""" |
||||
|
||||
# Search the row for a data set name, or list of data set names |
||||
# extract all the data until the next row |
||||
|
||||
if __name__ == "__main__": |
||||
datasets = [] |
||||
Loading…
Reference in new issue