Compare commits
2 Commits
9ad5e9180c
...
fa7f1516c8
| Author | SHA1 | Date |
|---|---|---|
|
|
fa7f1516c8 | 3 years ago |
|
|
f6245a3413 | 3 years ago |
@ -1,31 +0,0 @@ |
||||
output_columns = [ |
||||
"contract_number", |
||||
"vendor_name", |
||||
"AppNum", # OB only |
||||
"DateBooked", # OB only |
||||
"Document Number",# GP Only |
||||
"Resolution", |
||||
"Notes" |
||||
# 'Source' added for 'no match' |
||||
] |
||||
|
||||
[gp_filters] |
||||
# These regex will be combined and with ORs and used to filer |
||||
# the document number column of the GP report |
||||
doc_num_filters = [ |
||||
"p(oin)?ts", |
||||
"pool", |
||||
"promo", |
||||
"o(ver)?f(und)?", |
||||
"m(ar)?ke?t", |
||||
"title", |
||||
"adj", |
||||
"reg free", |
||||
"cma" |
||||
] |
||||
po_filter = "^(?!.*cma(\\s|\\d)).*$" |
||||
|
||||
[shared_columns] |
||||
contract_number = { GP = "Transaction Description", OB = "Contract"} |
||||
onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" } |
||||
vendor_name = { GP = "Vendor Name", OB = "DealerName"} |
||||
@ -0,0 +1,6 @@ |
||||
from typing import TypeVar, Literal |
||||
from enum import Enum |
||||
|
||||
class ReportSource(Enum): |
||||
OB = "OB" |
||||
GP = "GP" |
||||
@ -0,0 +1,186 @@ |
||||
from tomllib import load as t_load |
||||
from json import load as j_load |
||||
from pathlib import Path |
||||
from dataclasses import dataclass |
||||
from typing import TypedDict |
||||
from re import Pattern, compile |
||||
|
||||
from src import ReportSource |
||||
|
||||
|
||||
Regex = str | Pattern |
||||
|
||||
|
||||
class ReportConfigError(Exception): |
||||
""" |
||||
Exception stemming from a report configuration |
||||
""" |
||||
pass |
||||
|
||||
|
||||
class SharedColumn(TypedDict, total=True): |
||||
""" |
||||
Excel/Dataframe column that is shared between both GP & OB |
||||
""" |
||||
standard: str |
||||
gp: str |
||||
ob: str |
||||
|
||||
|
||||
class PathsConfig: |
||||
""" |
||||
Configuration holding the paths to: |
||||
- input_directory: Where to search for new report files |
||||
- gp/ob_glob: regex used to find new OB & GP files in the report location |
||||
- db_path: path to an SQLite database if any |
||||
""" |
||||
|
||||
def __init__(self, in_dir: str, out_dir: str, |
||||
input_regex_dict: dict[str:str] , db_path: str = None) -> None: |
||||
|
||||
self.input_directory: Path = Path(in_dir) |
||||
self.output_directory: Path = Path(out_dir) |
||||
|
||||
self.gp_glob: str = r"*.xlsx" |
||||
self.ob_glob: str = r"*.xlsx" |
||||
|
||||
if db_path is not None: |
||||
self.db_path: Path = Path(db_path) |
||||
|
||||
try: |
||||
self.gp_glob: str = input_regex_dict["GP"] |
||||
self.ob_glob: str = input_regex_dict["OB"] |
||||
except KeyError: |
||||
# Defaulting to newest of any xlsx file! |
||||
# TODO investigate warning |
||||
pass # will remain as *.xlsx |
||||
|
||||
def get_most_recent(self, report_type: ReportSource = None) -> Path|None| tuple[Path|None, Path|None]: |
||||
|
||||
report_files = [] |
||||
report_types = [ReportSource.OB, ReportSource.GP] if report_type is None else [report_type] |
||||
rt: ReportSource |
||||
for rt in report_types: |
||||
match rt: |
||||
case rt.OB: |
||||
file_glob: str = self.ob_glob |
||||
case rt.GP: |
||||
file_glob: str = self.gp_glob |
||||
case _: |
||||
raise NotImplementedError(\ |
||||
f"No regex pattern for report type: {rt}" |
||||
) |
||||
|
||||
files = self.input_directory.glob(file_glob) |
||||
|
||||
# Find the most recently created file |
||||
most_recent_file = None |
||||
most_recent_creation_time = None |
||||
|
||||
file: Path |
||||
for file in files: |
||||
creation_time = file.stat().st_ctime |
||||
if most_recent_creation_time is None or creation_time > most_recent_creation_time: |
||||
most_recent_file = file |
||||
most_recent_creation_time = creation_time |
||||
report_files.append(most_recent_file) |
||||
|
||||
if len(report_files) > 1: |
||||
return report_files |
||||
|
||||
return report_files[0] |
||||
|
||||
def has_database(self) -> tuple[bool, bool]: |
||||
""" |
||||
Returns whether the config has a SQlite database path and |
||||
whether that path exists |
||||
""" |
||||
has_db: bool = isinstance(self.db_path, Path) |
||||
exists: bool = self.db_path.exists() if has_db else False |
||||
return has_db, exists |
||||
|
||||
|
||||
@dataclass |
||||
class ReportConfig: |
||||
|
||||
# Paths to work with |
||||
# - input/output |
||||
# - input discovery regexes |
||||
# - SQLite database path |
||||
paths: PathsConfig |
||||
|
||||
use_mssql: bool |
||||
|
||||
# Work columns are included in finsished columns |
||||
work_columns: list[str] |
||||
finished_columns: list[str] |
||||
|
||||
filters: dict[str:list[Pattern]|Pattern] |
||||
|
||||
# Columns featured in both reports |
||||
# unified col name -> origin report -> origin col name |
||||
# e.g. contract_number -> GP -> Transaction Description |
||||
shared_columns: list[SharedColumn] |
||||
|
||||
@staticmethod |
||||
def from_file(config_path: str|Path) -> 'ReportConfig': |
||||
|
||||
config_path = Path(config_path) if isinstance(config_path, str) else config_path |
||||
|
||||
with open(config_path, "rb") as config_file: |
||||
match config_path.suffix: |
||||
case ".toml": |
||||
c_dict: dict = t_load(config_file) |
||||
case ".json": |
||||
c_dict: dict= j_load(config_file) |
||||
case _: |
||||
raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}") |
||||
|
||||
try: |
||||
|
||||
path_config: PathsConfig = PathsConfig( |
||||
in_dir = c_dict["input_directory"], |
||||
out_dir= c_dict["output_directory"], |
||||
input_regex_dict= c_dict["input_glob_pattern"], |
||||
db_path= c_dict["database_path"] |
||||
) |
||||
|
||||
use_mssql = False #TODO no yet implemented |
||||
|
||||
work_columns = c_dict["work_columns"] |
||||
finished_column = c_dict["finished_column"] |
||||
|
||||
# Create filter dict with compiled regex |
||||
filters_dict : dict = c_dict["filters"] |
||||
filters: dict[str:list[Pattern]|Pattern] = {} |
||||
k: str |
||||
v: Regex|list[Regex] |
||||
for k, v in filters_dict.items(): |
||||
|
||||
if not isinstance(v, Regex) and not isinstance(v, list): |
||||
raise ReportConfigError(f"Filter items must be a valid regex pattern or a list of valid patterns!\ |
||||
{v} ({type(v)}) is not valid!") |
||||
|
||||
# Convert the strings to regex patterns |
||||
if isinstance(v, list): |
||||
filters[k] = [ |
||||
r if isinstance(r, Pattern) |
||||
else compile(r) |
||||
for r in v |
||||
] |
||||
else: |
||||
filters[k] = compile(v) if isinstance(v, Pattern) else v |
||||
|
||||
shared_columns: list[SharedColumn] = c_dict["shared_columns"] |
||||
|
||||
except KeyError as ke: |
||||
raise ReportConfigError(f"Invalid report config!\n{ke}") |
||||
|
||||
return ReportConfig( |
||||
paths= path_config, |
||||
use_mssql= use_mssql, |
||||
work_columns= work_columns, |
||||
finished_columns= finished_column, |
||||
filters= filters, |
||||
shared_columns= shared_columns, |
||||
) |
||||
@ -0,0 +1,33 @@ |
||||
{ |
||||
"input_directory": "/path/to/input/folder", |
||||
"input_glob_pattern": { |
||||
"GP": "*GP*.xlsx", |
||||
"OB": "*OB*.xlsx" |
||||
}, |
||||
"output_directory": "/path/to/output", |
||||
"interactive_inputs": false, |
||||
"use_mssql": false, |
||||
"database_path": "./onhold.db", |
||||
"work_columns": [ |
||||
"Col_A", |
||||
"Col_B" |
||||
], |
||||
"finished_column": [ |
||||
"Notes", |
||||
"Conctract Number" |
||||
], |
||||
"filters": { |
||||
"filter_name": [ |
||||
"\\d{7}", |
||||
"\\w+" |
||||
], |
||||
"other_filter": "(OB|GP)$" |
||||
}, |
||||
"shared_columns": [ |
||||
{ |
||||
"standardized_name": "contract_number", |
||||
"GP": "Transactoin Description", |
||||
"OB": "ContractNumber" |
||||
} |
||||
] |
||||
} |
||||
@ -0,0 +1,72 @@ |
||||
#### Paths: using '' makes the string 'raw' to avoid escape characters |
||||
|
||||
# Path to the directory to search for input report files |
||||
input_directory = '../Reports' |
||||
# Regex used to discover newest files |
||||
input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'} |
||||
# Path to the directory to save the reconcilation work report |
||||
output_directory = '../Output' |
||||
# Fallback to interactive? |
||||
interactive_inputs = false # NOT YET IMPLEMENTED |
||||
|
||||
|
||||
#### DB |
||||
|
||||
# Whether to try using a mssql database |
||||
# NOT YET IMPLEMENTED! |
||||
use_mssql = false |
||||
# Path to the SQLite database used to view/save reconcilations |
||||
database_path = './onhold_reconciliation.db' |
||||
|
||||
|
||||
### Finished rec details |
||||
|
||||
# Columns to add to all 'work' sheets |
||||
# also saved 'Reconcilations' database |
||||
work_columns = [ |
||||
"HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month |
||||
"Resolution" # Text field describing the disprecany and how it may be resolved |
||||
] |
||||
# Columns to keep on reconcilation 'work' sheets |
||||
finished_column = [ |
||||
"contract_number", |
||||
"vendor_name", |
||||
"AppNum", # OB only |
||||
"Document Number", # GP Only |
||||
"DateBooked", # OB only |
||||
"Document Date", # GP Only |
||||
# 'Source' added for 'no match' |
||||
] |
||||
|
||||
# Any regex filters that might be needed |
||||
[filters] |
||||
# Use label to distinguish a regex set |
||||
doc_num_filters = [ |
||||
"p(oin)?ts", |
||||
"pool", |
||||
"promo", |
||||
"o(ver)?f(und)?", |
||||
"m(ar)?ke?t", |
||||
"title", |
||||
"adj", |
||||
"reg fee", |
||||
"rent", |
||||
"cma" |
||||
] |
||||
po_filter = ["^(?!.*cma(\\s|\\d)).*$"] |
||||
|
||||
# Columns that are featured & expected on both OB & GP |
||||
[[shared_columns]] |
||||
standardized_name = "contract_number" # The name you'd like to use to standardize them |
||||
GP = "Transaction Description" # Column name used in GP |
||||
OB = "Contract" # Column name used in GP |
||||
|
||||
[[shared_columns]] |
||||
standardized_name = "onhold_amount" |
||||
GP = "Current Trx Amount" |
||||
OB = "CurrentOnHold" |
||||
|
||||
[[shared_columns]] |
||||
standardized_name = "vendor_name" |
||||
GP = "Vendor Name" |
||||
OB = "DealerName" |
||||
@ -0,0 +1,40 @@ |
||||
#### Paths: using '' makes the string 'raw' to avoid escape characters |
||||
|
||||
# Path to the directory to search for input report files |
||||
input_directory = '/path/to/input/folder' |
||||
# Regex used to discover newest files |
||||
input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'} |
||||
# Path to the directory to save the reconcilation work report |
||||
output_directory = '/path/to/output' |
||||
# Fallback to interactive? |
||||
interactive_inputs = false # NOT YET IMPLEMENTED |
||||
|
||||
|
||||
#### DB |
||||
|
||||
# Whether to try using a mssql database |
||||
# NOT YET IMPLEMENTED! |
||||
use_mssql = false |
||||
# Path to the SQLite database used to view/save reconcilations |
||||
database_path = './onhold.db' |
||||
|
||||
|
||||
### Finished rec details |
||||
|
||||
# Columns to add to all 'work' sheets |
||||
# also saved 'Reconcilations' database |
||||
work_columns = ["Col_A", "Col_B" ] |
||||
# Columns to keep on reconcilation 'work' sheets |
||||
finished_column = [ "Notes", "Conctract Number" ] |
||||
|
||||
# Any regex filters that might be needed |
||||
[filters] |
||||
# Use label to distinguish a regex set |
||||
filter_name = [ '\d{7}', '\w+'] |
||||
other_filter = '(OB|GP)$' |
||||
|
||||
# Columns that are featured & expected on both OB & GP |
||||
[[shared_columns]] |
||||
standardized_name = "contract_number" # The name you'd like to use to standardize them |
||||
GP = "Transactoin Description" # Column name used in GP |
||||
OB = "ContractNumber" # Column name used in GP |
||||
@ -0,0 +1,5 @@ |
||||
import os |
||||
import sys |
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
||||
|
||||
import src |
||||
@ -0,0 +1,73 @@ |
||||
import unittest |
||||
from pathlib import Path |
||||
from re import Pattern, compile |
||||
from .context import src |
||||
from src import config |
||||
from src import ReportSource |
||||
|
||||
class TestReportConfig(unittest.TestCase): |
||||
|
||||
def test_from_file(self): |
||||
# Provide the path to your config file |
||||
config_file = Path(r"tests\test_inputs\TEST_reports_config.toml") |
||||
|
||||
# Call the static method from_file to create an instance of ReportConfig |
||||
report_config = config.ReportConfig.from_file(config_file) |
||||
|
||||
# Assert the values of the attributes in the created instance |
||||
self.assertEqual(report_config.paths.input_directory, Path(r"tests\test_inputs")) |
||||
self.assertEqual(report_config.paths.gp_glob, r'*GP*.xlsx') |
||||
self.assertEqual(report_config.paths.ob_glob, r"*OB*.xlsx") |
||||
self.assertEqual(report_config.paths.output_directory, Path(r"tests\test_outputs")) |
||||
self.assertEqual(report_config.use_mssql, False) |
||||
self.assertEqual(report_config.paths.db_path, Path("./onhold_reconciliation.db")) |
||||
self.assertEqual(report_config.work_columns, ["HideNextMonth", "Resolution"]) |
||||
self.assertEqual(report_config.finished_columns, [ |
||||
"contract_number", |
||||
"vendor_name", |
||||
"AppNum", |
||||
"Document Number", |
||||
"DateBooked", |
||||
"Document Date", |
||||
]) |
||||
self.assertEqual(report_config.filters["doc_num_filters"], [ |
||||
compile(r"p(oin)?ts",), |
||||
compile(r"pool",), |
||||
compile(r"promo",), |
||||
compile(r"o(ver)?f(und)?",), |
||||
compile(r"m(ar)?ke?t",), |
||||
compile(r"title",), |
||||
compile(r"adj",), |
||||
compile(r"reg fee",), |
||||
compile(r"rent",), |
||||
compile(r"cma",), |
||||
]) |
||||
self.assertEqual(report_config.filters["po_filter"], [compile(r"^(?!.*cma(\s|\d)).*$")]) |
||||
self.assertEqual(report_config.shared_columns[0]["standardized_name"], "contract_number") |
||||
self.assertEqual(report_config.shared_columns[0]["GP"], "Transaction Description") |
||||
self.assertEqual(report_config.shared_columns[0]["OB"], "Contract") |
||||
self.assertEqual(report_config.shared_columns[1]["standardized_name"], "onhold_amount") |
||||
self.assertEqual(report_config.shared_columns[1]["GP"], "Current Trx Amount") |
||||
self.assertEqual(report_config.shared_columns[1]["OB"], "CurrentOnHold") |
||||
self.assertEqual(report_config.shared_columns[2]["standardized_name"], "vendor_name") |
||||
self.assertEqual(report_config.shared_columns[2]["GP"], "Vendor Name") |
||||
self.assertEqual(report_config.shared_columns[2]["OB"], "DealerName") |
||||
|
||||
def test_get_newest(self): |
||||
# Provide the path to your config file |
||||
config_file = Path(r"tests\test_inputs\TEST_reports_config.toml") |
||||
|
||||
# Call the static method from_file to create an instance of ReportConfig |
||||
report_config = config.ReportConfig.from_file(config_file) |
||||
|
||||
newest_ob: Path = report_config.paths.get_most_recent(report_type=ReportSource.OB) |
||||
self.assertEqual(newest_ob.name, "April 2023 OB.xlsx") |
||||
newest_gp: Path = report_config.paths.get_most_recent(report_type=ReportSource.GP) |
||||
self.assertEqual(newest_gp.name, "April GP.xlsx") |
||||
|
||||
nob, ngp = report_config.paths.get_most_recent() |
||||
self.assertEqual(nob.name, "April 2023 OB.xlsx") |
||||
self.assertEqual(ngp.name, "April GP.xlsx") |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,72 @@ |
||||
#### Paths: using '' makes the string 'raw' to avoid escape characters |
||||
|
||||
# Path to the directory to search for input report files |
||||
input_directory = 'tests\test_inputs' |
||||
# Regex used to discover newest files |
||||
input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'} |
||||
# Path to the directory to save the reconcilation work report |
||||
output_directory = 'tests\test_outputs' |
||||
# Fallback to interactive? |
||||
interactive_inputs = false # NOT YET IMPLEMENTED |
||||
|
||||
|
||||
#### DB |
||||
|
||||
# Whether to try using a mssql database |
||||
# NOT YET IMPLEMENTED! |
||||
use_mssql = false |
||||
# Path to the SQLite database used to view/save reconcilations |
||||
database_path = './onhold_reconciliation.db' |
||||
|
||||
|
||||
### Finished rec details |
||||
|
||||
# Columns to add to all 'work' sheets |
||||
# also saved 'Reconcilations' database |
||||
work_columns = [ |
||||
"HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month |
||||
"Resolution" # Text field describing the disprecany and how it may be resolved |
||||
] |
||||
# Columns to keep on reconcilation 'work' sheets |
||||
finished_column = [ |
||||
"contract_number", |
||||
"vendor_name", |
||||
"AppNum", # OB only |
||||
"Document Number", # GP Only |
||||
"DateBooked", # OB only |
||||
"Document Date", # GP Only |
||||
# 'Source' added for 'no match' |
||||
] |
||||
|
||||
# Any regex filters that might be needed |
||||
[filters] |
||||
# Use label to distinguish a regex set |
||||
doc_num_filters = [ |
||||
"p(oin)?ts", |
||||
"pool", |
||||
"promo", |
||||
"o(ver)?f(und)?", |
||||
"m(ar)?ke?t", |
||||
"title", |
||||
"adj", |
||||
"reg fee", |
||||
"rent", |
||||
"cma" |
||||
] |
||||
po_filter = ['^(?!.*cma(\s|\d)).*$'] |
||||
|
||||
# Columns that are featured & expected on both OB & GP |
||||
[[shared_columns]] |
||||
standardized_name = "contract_number" # The name you'd like to use to standardize them |
||||
GP = "Transaction Description" # Column name used in GP |
||||
OB = "Contract" # Column name used in GP |
||||
|
||||
[[shared_columns]] |
||||
standardized_name = "onhold_amount" |
||||
GP = "Current Trx Amount" |
||||
OB = "CurrentOnHold" |
||||
|
||||
[[shared_columns]] |
||||
standardized_name = "vendor_name" |
||||
GP = "Vendor Name" |
||||
OB = "DealerName" |
||||
@ -1 +1 @@ |
||||
2.0 |
||||
2.1 |
||||
Loading…
Reference in new issue