Compare commits
4 Commits
5067678a8c
...
5b03a6c7a9
| Author | SHA1 | Date |
|---|---|---|
|
|
5b03a6c7a9 | 3 years ago |
|
|
1bb12c722a | 3 years ago |
|
|
5caaf3d7ac | 3 years ago |
|
|
a3905d118e | 3 years ago |
|
Before Width: | Height: | Size: 1.1 KiB |
|
Before Width: | Height: | Size: 2.6 KiB |
|
Before Width: | Height: | Size: 477 B |
|
Before Width: | Height: | Size: 6.9 KiB |
|
Before Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 819 B |
|
Before Width: | Height: | Size: 512 B |
|
Before Width: | Height: | Size: 568 B |
|
Before Width: | Height: | Size: 1.2 KiB |
|
Before Width: | Height: | Size: 3.1 KiB |
@ -0,0 +1 @@ |
||||
name = "Test Name" |
||||
@ -0,0 +1 @@ |
||||
{"debug": true, "consolidatedBasePath": ".", "defaultLocations": {"ach": "Z:/shared/Business Solutions/Griff/Code/InfoLeaseExtract/2023/2023.05/2023.05.24/ACH", "disp": "", "gl": "", "lb": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "minv": "", "niv": "", "ren": "", "pymt": "Z:/Business Solutions/Griff/Code/InfoLeaseExtract/InputFiles", "uap": "", "pastdue": ""}} |
||||
@ -0,0 +1,14 @@ |
||||
debug = true |
||||
consolidatedBasePath = '\\leafnow.com\shared\Accounting\CASH APPS\2023' |
||||
|
||||
[defaultLocations] |
||||
ach = '' |
||||
disp = '' |
||||
gl = '' |
||||
lb = '' |
||||
minv ='' |
||||
niv = '' |
||||
ren = '' |
||||
pymt = '' |
||||
uap ='' |
||||
pastdue = '' |
||||
@ -0,0 +1,12 @@ |
||||
{ |
||||
"name": { |
||||
"report": "", |
||||
"excel": "" |
||||
}, |
||||
"relative_position": { |
||||
"rows": 0, |
||||
"col": 0 |
||||
}, |
||||
"length": 0, |
||||
"data_type": "int" |
||||
} |
||||
@ -0,0 +1,184 @@ |
||||
from typing import TypeAlias, TypeVar |
||||
from dataclasses import dataclass |
||||
from pathlib import Path |
||||
import pathlib as pl |
||||
from abc import ABC, abstractmethod, abstractproperty |
||||
from re import search, match, compile, Match, Pattern |
||||
from enum import Enum |
||||
|
||||
ColumnIndex: TypeAlias = int |
||||
Money: TypeAlias = float |
||||
|
||||
Numeric = TypeVar("Numeric", float, int) |
||||
|
||||
class Line(Enum): |
||||
Header: str |
||||
Data: str |
||||
Erroneous: str |
||||
Top: str |
||||
Bottom: str |
||||
|
||||
|
||||
|
||||
@dataclass |
||||
class RelativePosition: |
||||
""" |
||||
Coordinates for navigating from one point in a row to another |
||||
""" |
||||
rows: int |
||||
col: ColumnIndex |
||||
|
||||
@dataclass |
||||
class DataValue: |
||||
|
||||
position: RelativePosition |
||||
length : int |
||||
regex: Pattern |
||||
dtype: type |
||||
|
||||
def correct_line(self, adj_lines_since_header: int) -> bool: |
||||
""" |
||||
""" |
||||
return adj_lines_since_header % self.position.rows == 0 |
||||
|
||||
def _line_slice(self, line: Line.Data) -> str|None: |
||||
""" |
||||
Attempts to get the data from the line. |
||||
Returns string in correct postion or None if out of range. |
||||
""" |
||||
try: |
||||
start: int = self.position.col |
||||
end: int = start + self.length |
||||
line_slice: str = line[start:end] |
||||
except IndexError: |
||||
#TODO: Add logging |
||||
line_slice = None |
||||
finally: |
||||
return line_slice |
||||
|
||||
@staticmethod |
||||
def _to_float(number_str: str) -> float|None: |
||||
try: |
||||
f_value:float = float(number_str.replace(',','')) |
||||
return f_value |
||||
except: |
||||
return None |
||||
|
||||
def extract(self, line: Line.Data) -> type|None: |
||||
""" |
||||
""" |
||||
line_slice: str|None = self._line_slice(line) |
||||
if isinstance(line_slice, None): |
||||
return None |
||||
|
||||
value_match: Match|None = search(self.regex, line_slice) |
||||
if isinstance(value_match, None): |
||||
return None |
||||
|
||||
value_str: str = value_match.group() |
||||
|
||||
value_str.strip() |
||||
if self.dtype == int or self.dtype == float: |
||||
return self._to_float(value_str) |
||||
#TODO datetime |
||||
return value_str |
||||
|
||||
class DataSet: |
||||
|
||||
def __init__(self, config: dict) -> None: |
||||
self.r_name = config["naming"]["report"] |
||||
try: |
||||
self.e_name = config["naming"]["excel"] |
||||
except KeyError: |
||||
self.e_name = self.r_name |
||||
|
||||
self.data_value: DataValue = DataValue( |
||||
position = RelativePosition( |
||||
rows= config["relative_position"]["rows"], |
||||
col= config["relative_position"]["col"] |
||||
), |
||||
length = config["length"], |
||||
dtype = config["data_type"], |
||||
) |
||||
|
||||
def line_position(self, line: str) -> ColumnIndex|None: |
||||
""" |
||||
Searches a line for the report header for this dataset. |
||||
|
||||
Returns: |
||||
- ColumnIndex(int) | None: The column index of the matches end position |
||||
or None if no match was found |
||||
""" |
||||
header_match: Match|None = search(self.r_name, line) |
||||
return header_match.end() if isinstance(header_match, Match) else None |
||||
|
||||
@dataclass |
||||
class ReportConfig: |
||||
|
||||
file_extension: str |
||||
name: str |
||||
datasets: list[DataSet] |
||||
data_line_regexes: list[Pattern] |
||||
|
||||
|
||||
class ILReport(ABC): |
||||
|
||||
def __init__(self, file_path: Path, report_config: ReportConfig) -> None: |
||||
self.in_file_path: Path = file_path |
||||
self.line_gen = self._line_generator(file_path) |
||||
|
||||
self.config: ReportConfig = report_config |
||||
self.name = report_config.name |
||||
|
||||
self.line_type_history: list[Line] = [] |
||||
self.last_header_line: int|None = None |
||||
|
||||
self.data_dict: dict = { |
||||
header.e_name: [] |
||||
for header in self.config.datasets |
||||
} |
||||
|
||||
@staticmethod |
||||
def _line_generator(file_path: Path): |
||||
with open(file_path, 'r') as in_file: |
||||
line: str |
||||
for line in in_file.readlines(): |
||||
yield line |
||||
|
||||
def _add_line_history(self, line: Line, max_history: int = 10): |
||||
self.line_type_history.append(line) |
||||
while len(self.line_type_history) > max_history: |
||||
self.line_type_history.pop(0) |
||||
|
||||
def _is_header_line(self, line: str) -> bool: |
||||
""" |
||||
Checks whether a report line has data headers. |
||||
""" |
||||
regex: Pattern |
||||
for regex in self.config.data_line_regexes: |
||||
if isinstance(search(regex,line), Match): |
||||
return True |
||||
return False |
||||
|
||||
@abstractmethod |
||||
def _skip_line(self, line) -> bool: |
||||
""" |
||||
Tells whether we should skip this line |
||||
""" |
||||
|
||||
@abstractmethod |
||||
def _process_line(self): |
||||
""" |
||||
|
||||
""" |
||||
|
||||
@abstractmethod |
||||
def _process_dataline(self, dataline: Line.Data): |
||||
""" |
||||
""" |
||||
|
||||
# Search the row for a data set name, or list of data set names |
||||
# extract all the data until the next row |
||||
|
||||
if __name__ == "__main__": |
||||
datasets = [] |
||||
@ -1 +0,0 @@ |
||||
{"debug": false, "consolidatedBasePath": "leafnow.com/shared/cashapps", "defaultLocations": {"ach": "", "disp": "", "gl": "", "lb": "", "minv": "", "niv": "", "ren": "", "pymt": "", "uap": "", "pastdue": ""}} |
||||