Finished config implementation and added testing for config classes.

dev
= 3 years ago
parent f6245a3413
commit fa7f1516c8
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 1
      .gitignore
  2. 107
      src/config.py
  3. 44
      src/config_reports.toml
  4. 0
      src/configs/config_logger.toml
  5. 50
      src/configs/report_config_template.json
  6. 72
      src/configs/reports_config.toml
  7. 2
      src/configs/reports_config_template.toml
  8. 5
      tests/context.py
  9. 73
      tests/test_config.py
  10. BIN
      tests/test_inputs/April 2023 OB.xlsx
  11. BIN
      tests/test_inputs/April GP.xlsx
  12. BIN
      tests/test_inputs/April Reconciled Holds.xlsx
  13. 72
      tests/test_inputs/TEST_reports_config.toml

1
.gitignore vendored

@ -12,3 +12,4 @@ ghlib/
*.txt
!version.txt
!tests/test_inputs/*

@ -31,59 +31,64 @@ class PathsConfig:
"""
Configuration holding the paths to:
- input_directory: Where to search for new report files
- gp/ob_regex: regex used to find new OB & GP files in the report location
- gp/ob_glob: regex used to find new OB & GP files in the report location
- db_path: path to an SQLite database if any
"""
def __init__(self, in_dir: str, out_dir: str,
input_regex_dict: dict[str:Regex] , db_path: str = None) -> None:
input_regex_dict: dict[str:str] , db_path: str = None) -> None:
self.input_directory: Path = Path(in_dir)
self.output_directory: Path = Path(out_dir)
self.gp_regex: Pattern = compile("*.xlsx")
self.ob_regex: Pattern = compile("*.xlsx")
self.gp_glob: str = r"*.xlsx"
self.ob_glob: str = r"*.xlsx"
if db_path is not None:
self.db_path: Path = Path(db_path)
try:
self.gp_regex: Pattern = compile(input_regex_dict["GP"])
self.ob_regex: Pattern = compile(input_regex_dict["OB"])
self.gp_glob: str = input_regex_dict["GP"]
self.ob_glob: str = input_regex_dict["OB"]
except KeyError:
# Defaulting to newest of any xlsx file!
# TODO investigate warning
pass # will remain as *.xlsx
def get_most_recent(self, report_type: ReportSource) -> Path|None:
match report_type:
case report_type.OB:
file_regex: Pattern = self.ob_regex
case report_type.GP:
file_regex: Pattern = self.gp_regex
case _:
raise NotImplementedError(\
f"No regex pattern for report type: {report_type}"
)
files = self.input_directory.glob(file_regex)
# Find the most recently created file
most_recent_file = None
most_recent_creation_time = None
file: Path
for file in files:
creation_time = file.stat().st_ctime
if most_recent_creation_time is None or creation_time > most_recent_creation_time:
most_recent_file = file
most_recent_creation_time = creation_time
return most_recent_file
def get_most_recent(self, report_type: ReportSource = None) -> Path|None| tuple[Path|None, Path|None]:
report_files = []
report_types = [ReportSource.OB, ReportSource.GP] if report_type is None else [report_type]
rt: ReportSource
for rt in report_types:
match rt:
case rt.OB:
file_glob: str = self.ob_glob
case rt.GP:
file_glob: str = self.gp_glob
case _:
raise NotImplementedError(\
f"No regex pattern for report type: {rt}"
)
files = self.input_directory.glob(file_glob)
# Find the most recently created file
most_recent_file = None
most_recent_creation_time = None
file: Path
for file in files:
creation_time = file.stat().st_ctime
if most_recent_creation_time is None or creation_time > most_recent_creation_time:
most_recent_file = file
most_recent_creation_time = creation_time
report_files.append(most_recent_file)
if len(report_files) > 1:
return report_files
return report_files[0]
def has_database(self) -> tuple[bool, bool]:
"""
@ -105,7 +110,6 @@ class ReportConfig:
paths: PathsConfig
use_mssql: bool
db_path: Path
# Work columns are included in finsished columns
work_columns: list[str]
@ -123,36 +127,37 @@ class ReportConfig:
config_path = Path(config_path) if isinstance(config_path, str) else config_path
match config_path.suffix:
case ".toml":
c_dict: dict = t_load(config_path)
case ".json":
c_dict: dict= j_load(config_path)
case _:
raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}")
with open(config_path, "rb") as config_file:
match config_path.suffix:
case ".toml":
c_dict: dict = t_load(config_file)
case ".json":
c_dict: dict= j_load(config_file)
case _:
raise NotImplementedError(f"Only json and toml configs are supported not: {config_path.suffix}")
try:
path_config: PathsConfig = PathsConfig(
in_dir = c_dict["input_path"],
out_dir= c_dict["output_path"],
input_regex_dict= c_dict["input_regex"],
db_path= c_dict["db_path"]
in_dir = c_dict["input_directory"],
out_dir= c_dict["output_directory"],
input_regex_dict= c_dict["input_glob_pattern"],
db_path= c_dict["database_path"]
)
use_mssql = False #TODO no yet implemented
work_columns = c_dict["work_columns"]
output_columns = c_dict["output_columns"]
finished_column = c_dict["finished_column"]
# Add create out filter dict
# Create filter dict with compiled regex
filters_dict : dict = c_dict["filters"]
filters: dict[str:list[Pattern]|Pattern] = {}
k: str
v: Regex|list[Regex]
for k, v in filters_dict.items():
if not isinstance(v, Regex|list[Regex]):
if not isinstance(v, Regex) and not isinstance(v, list):
raise ReportConfigError(f"Filter items must be a valid regex pattern or a list of valid patterns!\
{v} ({type(v)}) is not valid!")
@ -175,7 +180,7 @@ class ReportConfig:
paths= path_config,
use_mssql= use_mssql,
work_columns= work_columns,
finished_columns= output_columns,
finished_columns= finished_column,
filters= filters,
shared_columns= shared_columns,
)

@ -1,44 +0,0 @@
output_path = '../Work'
db_path = "OnHold.db"
# Columns added each 'working' sheet in the new report dataframe
work_columns = [
"HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
"Resolution" # Text field describing the disprecany and how it may be resolved
]
# List of Columns to show on the 'working' sheets of the rec report
output_columns = [
"contract_number",
"vendor_name",
"AppNum", # OB only
"Document Number",# GP Only
"DateBooked",# OB only
"Document Date", #GP Only
"HideNextMonth",
"Resolution",
# 'Source' added for 'no match'
]
[filters]
# These regex will be combined and with ORs and used to filer
# the document number column of the GP report
doc_num_filters = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg fee",
"rent",
"cma"
]
po_filter = ["^(?!.*cma(\\s|\\d)).*$"]
# Columns that are common to both GP and OB
[shared_columns]
contract_number = { GP = "Transaction Description", OB = "Contract"}
onhold_amount = { GP = "Current Trx Amount", OB = "CurrentOnHold" }
vendor_name = { GP = "Vendor Name", OB = "DealerName"}

@ -1,19 +1,33 @@
{
"input_directory": "",
"output_directory": "",
"use_mssql": false,
"database_path": "",
"work_columns": [],
"finished_column": [],
"filters": {
"filter_name": [],
"other_filter": ""
},
"shared_columns": [
{
"standardized_name": "",
"GP": "",
"OB": ""
}
]
}
"input_directory": "/path/to/input/folder",
"input_glob_pattern": {
"GP": "*GP*.xlsx",
"OB": "*OB*.xlsx"
},
"output_directory": "/path/to/output",
"interactive_inputs": false,
"use_mssql": false,
"database_path": "./onhold.db",
"work_columns": [
"Col_A",
"Col_B"
],
"finished_column": [
"Notes",
"Conctract Number"
],
"filters": {
"filter_name": [
"\\d{7}",
"\\w+"
],
"other_filter": "(OB|GP)$"
},
"shared_columns": [
{
"standardized_name": "contract_number",
"GP": "Transactoin Description",
"OB": "ContractNumber"
}
]
}

@ -0,0 +1,72 @@
#### Paths: using '' makes the string 'raw' to avoid escape characters
# Path to the directory to search for input report files
input_directory = '../Reports'
# Regex used to discover newest files
input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
# Path to the directory to save the reconcilation work report
output_directory = '../Output'
# Fallback to interactive?
interactive_inputs = false # NOT YET IMPLEMENTED
#### DB
# Whether to try using a mssql database
# NOT YET IMPLEMENTED!
use_mssql = false
# Path to the SQLite database used to view/save reconcilations
database_path = './onhold_reconciliation.db'
### Finished rec details
# Columns to add to all 'work' sheets
# also saved 'Reconcilations' database
work_columns = [
"HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
"Resolution" # Text field describing the disprecany and how it may be resolved
]
# Columns to keep on reconcilation 'work' sheets
finished_column = [
"contract_number",
"vendor_name",
"AppNum", # OB only
"Document Number", # GP Only
"DateBooked", # OB only
"Document Date", # GP Only
# 'Source' added for 'no match'
]
# Any regex filters that might be needed
[filters]
# Use label to distinguish a regex set
doc_num_filters = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg fee",
"rent",
"cma"
]
po_filter = ["^(?!.*cma(\\s|\\d)).*$"]
# Columns that are featured & expected on both OB & GP
[[shared_columns]]
standardized_name = "contract_number" # The name you'd like to use to standardize them
GP = "Transaction Description" # Column name used in GP
OB = "Contract" # Column name used in GP
[[shared_columns]]
standardized_name = "onhold_amount"
GP = "Current Trx Amount"
OB = "CurrentOnHold"
[[shared_columns]]
standardized_name = "vendor_name"
GP = "Vendor Name"
OB = "DealerName"

@ -3,7 +3,7 @@
# Path to the directory to search for input report files
input_directory = '/path/to/input/folder'
# Regex used to discover newest files
input_regex = { GP = '*likeThis*.xlsx', OB = '*.csv'}
input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
# Path to the directory to save the reconcilation work report
output_directory = '/path/to/output'
# Fallback to interactive?

@ -0,0 +1,5 @@
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import src

@ -0,0 +1,73 @@
import unittest
from pathlib import Path
from re import Pattern, compile
from .context import src
from src import config
from src import ReportSource
class TestReportConfig(unittest.TestCase):
def test_from_file(self):
# Provide the path to your config file
config_file = Path(r"tests\test_inputs\TEST_reports_config.toml")
# Call the static method from_file to create an instance of ReportConfig
report_config = config.ReportConfig.from_file(config_file)
# Assert the values of the attributes in the created instance
self.assertEqual(report_config.paths.input_directory, Path(r"tests\test_inputs"))
self.assertEqual(report_config.paths.gp_glob, r'*GP*.xlsx')
self.assertEqual(report_config.paths.ob_glob, r"*OB*.xlsx")
self.assertEqual(report_config.paths.output_directory, Path(r"tests\test_outputs"))
self.assertEqual(report_config.use_mssql, False)
self.assertEqual(report_config.paths.db_path, Path("./onhold_reconciliation.db"))
self.assertEqual(report_config.work_columns, ["HideNextMonth", "Resolution"])
self.assertEqual(report_config.finished_columns, [
"contract_number",
"vendor_name",
"AppNum",
"Document Number",
"DateBooked",
"Document Date",
])
self.assertEqual(report_config.filters["doc_num_filters"], [
compile(r"p(oin)?ts",),
compile(r"pool",),
compile(r"promo",),
compile(r"o(ver)?f(und)?",),
compile(r"m(ar)?ke?t",),
compile(r"title",),
compile(r"adj",),
compile(r"reg fee",),
compile(r"rent",),
compile(r"cma",),
])
self.assertEqual(report_config.filters["po_filter"], [compile(r"^(?!.*cma(\s|\d)).*$")])
self.assertEqual(report_config.shared_columns[0]["standardized_name"], "contract_number")
self.assertEqual(report_config.shared_columns[0]["GP"], "Transaction Description")
self.assertEqual(report_config.shared_columns[0]["OB"], "Contract")
self.assertEqual(report_config.shared_columns[1]["standardized_name"], "onhold_amount")
self.assertEqual(report_config.shared_columns[1]["GP"], "Current Trx Amount")
self.assertEqual(report_config.shared_columns[1]["OB"], "CurrentOnHold")
self.assertEqual(report_config.shared_columns[2]["standardized_name"], "vendor_name")
self.assertEqual(report_config.shared_columns[2]["GP"], "Vendor Name")
self.assertEqual(report_config.shared_columns[2]["OB"], "DealerName")
def test_get_newest(self):
# Provide the path to your config file
config_file = Path(r"tests\test_inputs\TEST_reports_config.toml")
# Call the static method from_file to create an instance of ReportConfig
report_config = config.ReportConfig.from_file(config_file)
newest_ob: Path = report_config.paths.get_most_recent(report_type=ReportSource.OB)
self.assertEqual(newest_ob.name, "April 2023 OB.xlsx")
newest_gp: Path = report_config.paths.get_most_recent(report_type=ReportSource.GP)
self.assertEqual(newest_gp.name, "April GP.xlsx")
nob, ngp = report_config.paths.get_most_recent()
self.assertEqual(nob.name, "April 2023 OB.xlsx")
self.assertEqual(ngp.name, "April GP.xlsx")
if __name__ == '__main__':
unittest.main()

Binary file not shown.

@ -0,0 +1,72 @@
#### Paths: using '' makes the string 'raw' to avoid escape characters
# Path to the directory to search for input report files
input_directory = 'tests\test_inputs'
# Regex used to discover newest files
input_glob_pattern = { GP = "*GP*.xlsx", OB = '*OB*.xlsx'}
# Path to the directory to save the reconcilation work report
output_directory = 'tests\test_outputs'
# Fallback to interactive?
interactive_inputs = false # NOT YET IMPLEMENTED
#### DB
# Whether to try using a mssql database
# NOT YET IMPLEMENTED!
use_mssql = false
# Path to the SQLite database used to view/save reconcilations
database_path = './onhold_reconciliation.db'
### Finished rec details
# Columns to add to all 'work' sheets
# also saved 'Reconcilations' database
work_columns = [
"HideNextMonth", # Boolean column for user to indicate if this contract should be ignored next month
"Resolution" # Text field describing the disprecany and how it may be resolved
]
# Columns to keep on reconcilation 'work' sheets
finished_column = [
"contract_number",
"vendor_name",
"AppNum", # OB only
"Document Number", # GP Only
"DateBooked", # OB only
"Document Date", # GP Only
# 'Source' added for 'no match'
]
# Any regex filters that might be needed
[filters]
# Use label to distinguish a regex set
doc_num_filters = [
"p(oin)?ts",
"pool",
"promo",
"o(ver)?f(und)?",
"m(ar)?ke?t",
"title",
"adj",
"reg fee",
"rent",
"cma"
]
po_filter = ['^(?!.*cma(\s|\d)).*$']
# Columns that are featured & expected on both OB & GP
[[shared_columns]]
standardized_name = "contract_number" # The name you'd like to use to standardize them
GP = "Transaction Description" # Column name used in GP
OB = "Contract" # Column name used in GP
[[shared_columns]]
standardized_name = "onhold_amount"
GP = "Current Trx Amount"
OB = "CurrentOnHold"
[[shared_columns]]
standardized_name = "vendor_name"
GP = "Vendor Name"
OB = "DealerName"
Loading…
Cancel
Save