|
|
|
@ -1,17 +1,54 @@ |
|
|
|
from pandas import DataFrame, merge, to_datetime, NaT, concat, Series |
|
|
|
from pandas import DataFrame, merge, to_datetime, NaT, concat, ExcelWriter |
|
|
|
from numpy import concatenate |
|
|
|
from openpyxl import Workbook, load_workbook |
|
|
|
from abc import ABC, abstractmethod |
|
|
|
from abc import ABC |
|
|
|
from logging import getLogger |
|
|
|
from logging import getLogger |
|
|
|
import re |
|
|
|
import re |
|
|
|
from typing import Literal |
|
|
|
|
|
|
|
import datetime |
|
|
|
import datetime |
|
|
|
from copy import deepcopy |
|
|
|
from copy import deepcopy |
|
|
|
|
|
|
|
from dataclasses import dataclass |
|
|
|
from helpers import CN_REGEX, drop_unnamed |
|
|
|
from helpers import CN_REGEX, drop_unnamed |
|
|
|
from memory import get_prev_reconciled |
|
|
|
from memory import get_prev_reconciled, hash_cols, col_hash, create_identifier |
|
|
|
|
|
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
|
|
logger = getLogger(__name__) |
|
|
|
logger = getLogger(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
|
|
|
|
|
class ReconciledReports: |
|
|
|
|
|
|
|
no_match: DataFrame |
|
|
|
|
|
|
|
amt_mismatch: DataFrame |
|
|
|
|
|
|
|
prev_rec: DataFrame |
|
|
|
|
|
|
|
gp_filtered: DataFrame |
|
|
|
|
|
|
|
ob_overdue: DataFrame |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_reports(self, output_path: Path): |
|
|
|
|
|
|
|
with ExcelWriter(output_path, mode='w') as writer: |
|
|
|
|
|
|
|
self.no_match.drop_duplicates(inplace=True) |
|
|
|
|
|
|
|
self.no_match.to_excel(writer, sheet_name="No Match", |
|
|
|
|
|
|
|
index=False, freeze_panes=(1,3) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
self.amt_mismatch.drop_duplicates(inplace=True) |
|
|
|
|
|
|
|
self.amt_mismatch.to_excel(writer, sheet_name="Amount Mismatch", |
|
|
|
|
|
|
|
index=False, freeze_panes=(1,3) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
self.ob_overdue.to_excel(writer, sheet_name="Overdue", |
|
|
|
|
|
|
|
index=False |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
self.prev_rec.to_excel(writer, sheet_name="Previously Reconciled", |
|
|
|
|
|
|
|
index=False, freeze_panes=(1,3) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
self.gp_filtered.to_excel(writer, sheet_name="Filtered from GP", |
|
|
|
|
|
|
|
index=False, freeze_panes=(1,0) |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wb: Workbook = load_workbook(output_path) |
|
|
|
|
|
|
|
for sheet in ["No Match", "Amount Mismatch"]: |
|
|
|
|
|
|
|
ws = wb[sheet] |
|
|
|
|
|
|
|
ws.column_dimensions['A'].hidden = True |
|
|
|
|
|
|
|
ws.column_dimensions['B'].hidden = True |
|
|
|
|
|
|
|
for sheet in ["Filtered from GP", "Previously Reconciled"]: |
|
|
|
|
|
|
|
wb[sheet].sheet_state = "hidden" |
|
|
|
|
|
|
|
wb.save(output_path) |
|
|
|
|
|
|
|
wb.close() |
|
|
|
|
|
|
|
|
|
|
|
class HoldReport(ABC): |
|
|
|
class HoldReport(ABC): |
|
|
|
|
|
|
|
|
|
|
|
@ -21,9 +58,8 @@ class HoldReport(ABC): |
|
|
|
self.config = reports_config |
|
|
|
self.config = reports_config |
|
|
|
drop_unnamed(dataframe) |
|
|
|
drop_unnamed(dataframe) |
|
|
|
self.df = dataframe |
|
|
|
self.df = dataframe |
|
|
|
self.prev_rec = None |
|
|
|
self.df = self._add_work_columns(self.df) |
|
|
|
self._normalize() |
|
|
|
self._normalize() |
|
|
|
self._previsouly_resolved() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize(self): |
|
|
|
def _normalize(self): |
|
|
|
@ -50,55 +86,60 @@ class HoldReport(ABC): |
|
|
|
self.df["Source"] = self.source |
|
|
|
self.df["Source"] = self.source |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _previsouly_resolved(self): |
|
|
|
@staticmethod |
|
|
|
|
|
|
|
def _remove_prev_recs(contract_match, no_match) -> \ |
|
|
|
|
|
|
|
tuple[DataFrame, DataFrame, DataFrame]: |
|
|
|
""" |
|
|
|
""" |
|
|
|
""" |
|
|
|
""" |
|
|
|
current_contracts: list[str] = self.df["contract_number"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prev_recd: DataFrame = get_prev_reconciled(contracts=current_contracts) |
|
|
|
idents: list[col_hash] = create_identifier(contract_match)["Indentifier"].to_list() |
|
|
|
if not prev_recd: |
|
|
|
idents.extend(create_identifier(no_match)["Indentifier"].to_list()) |
|
|
|
|
|
|
|
logger.debug(f"{idents=}") |
|
|
|
|
|
|
|
# Get previsouly reced |
|
|
|
|
|
|
|
prev_recs: DataFrame|None = get_prev_reconciled(idents) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if prev_recs is None: |
|
|
|
logger.info("No previously reconciled!") |
|
|
|
logger.info("No previously reconciled!") |
|
|
|
self.df = self._add_work_columns(self.df) |
|
|
|
return DataFrame(), contract_match, no_match |
|
|
|
return |
|
|
|
|
|
|
|
self.prev_rec = prev_recd |
|
|
|
dfs = [] |
|
|
|
|
|
|
|
for df in [contract_match, no_match]: |
|
|
|
start_size = self.df.shape[0] |
|
|
|
start_size = df.shape[0] |
|
|
|
logger.debug(f"Report DF: \n{self.df}") |
|
|
|
logger.debug(f"Report DF: \n{df}") |
|
|
|
logger.debug(f"prev_rec: \n{prev_recd}") |
|
|
|
logger.debug(f"prev_rec: \n{prev_recs}") |
|
|
|
|
|
|
|
|
|
|
|
source_id = f"ID_{self.source}" |
|
|
|
|
|
|
|
self.df[source_id] = self.df["ID"] |
|
|
|
|
|
|
|
self.df = merge( |
|
|
|
|
|
|
|
self.df, |
|
|
|
|
|
|
|
prev_recd, |
|
|
|
|
|
|
|
how="left", |
|
|
|
|
|
|
|
on= source_id, |
|
|
|
|
|
|
|
suffixes=("_cur", "_prev") |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
#self.df.to_excel(f"merged_df_{self.source}.xlsx") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Drop anything that should be ignored |
|
|
|
df = merge( |
|
|
|
self.df = self.df[self.df["Hide Next Month"] != True] |
|
|
|
df, |
|
|
|
logger.info(f"Prev res added:\n{self.df}") |
|
|
|
prev_recs, |
|
|
|
|
|
|
|
how="left", |
|
|
|
col_to_drop = [] |
|
|
|
on= "Indentifier", |
|
|
|
for c in self.df.keys().to_list(): |
|
|
|
suffixes=("_cur", "_prev") |
|
|
|
logger.debug(f"{c=}") |
|
|
|
) |
|
|
|
if "_prev" in c or "ID_" in c: |
|
|
|
|
|
|
|
logger.debug(f"Found '_prev' in {c}") |
|
|
|
df = HoldReport._created_combined_col("HideNextMonth", df, ["prev", "cur"]) |
|
|
|
col_to_drop.append(c) |
|
|
|
df = HoldReport._created_combined_col("Resolution", df, ["prev", "cur"]) |
|
|
|
else: |
|
|
|
df["ID_OB"] = df["ID_OB_cur"] |
|
|
|
logger.debug(f"{c} is a good col!") |
|
|
|
df["ID_GP"] = df["ID_GP_cur"] |
|
|
|
#col_to_drop.extend([c for c in self.df.keys().to_list() if '_prev' in c]) |
|
|
|
|
|
|
|
logger.debug(f"{col_to_drop=}") |
|
|
|
# Drop anything that should be ignored |
|
|
|
self.df.drop( |
|
|
|
df = df[df["HideNextMonth"] != True] |
|
|
|
columns= col_to_drop, |
|
|
|
logger.info(f"Prev res added:\n{df}") |
|
|
|
inplace=True |
|
|
|
|
|
|
|
) |
|
|
|
col_to_drop = [] |
|
|
|
# Restandardize |
|
|
|
for c in df.keys().to_list(): |
|
|
|
self.df.rename(columns={"contract_number_cur": "contract_number"}, inplace=True) |
|
|
|
if "_prev" in c in c or "_cur" in c: |
|
|
|
end_size = self.df.shape[0] |
|
|
|
col_to_drop.append(c) |
|
|
|
logger.info(f"Reduced df by {start_size-end_size}") |
|
|
|
|
|
|
|
|
|
|
|
logger.debug(f"{col_to_drop=}") |
|
|
|
|
|
|
|
df.drop( |
|
|
|
|
|
|
|
columns= col_to_drop, |
|
|
|
|
|
|
|
inplace=True |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
# Restandardize |
|
|
|
|
|
|
|
end_size = df.shape[0] |
|
|
|
|
|
|
|
logger.info(f"Reduced df by {start_size-end_size}") |
|
|
|
|
|
|
|
dfs.append(df) |
|
|
|
|
|
|
|
return prev_recs, dfs[0], dfs[1] |
|
|
|
|
|
|
|
|
|
|
|
def _remove_full_matches(self, other: 'HoldReport'): |
|
|
|
def _remove_full_matches(self, other: 'HoldReport'): |
|
|
|
""" |
|
|
|
""" |
|
|
|
@ -111,7 +152,7 @@ class HoldReport(ABC): |
|
|
|
other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))] |
|
|
|
other.df: DataFrame = other.df[~(other.df["ID"].isin(self.df["ID"]))] |
|
|
|
self.df = filter_id_match |
|
|
|
self.df = filter_id_match |
|
|
|
self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True) |
|
|
|
self.combined_missing: DataFrame = concat([self.df, other.df], ignore_index=True) |
|
|
|
self.combined_missing.to_excel("ALL MISSING.xlsx") |
|
|
|
#self.combined_missing.to_excel("ALL MISSING.xlsx") |
|
|
|
logger.debug(f"Combined Missing:\n{self.combined_missing}") |
|
|
|
logger.debug(f"Combined Missing:\n{self.combined_missing}") |
|
|
|
logger.info(f"Payments with errors: {self.combined_missing.shape[0]}") |
|
|
|
logger.info(f"Payments with errors: {self.combined_missing.shape[0]}") |
|
|
|
|
|
|
|
|
|
|
|
@ -127,7 +168,7 @@ class HoldReport(ABC): |
|
|
|
return target_df |
|
|
|
return target_df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _requires_rec(self, other: 'HoldReport') -> DataFrame: |
|
|
|
def _requires_rec(self, other: 'HoldReport') -> tuple[DataFrame, DataFrame]: |
|
|
|
""" |
|
|
|
""" |
|
|
|
To be run after full matches have been re |
|
|
|
To be run after full matches have been re |
|
|
|
""" |
|
|
|
""" |
|
|
|
@ -140,9 +181,11 @@ class HoldReport(ABC): |
|
|
|
suffixes=('_'+self.source, '_'+other.source) |
|
|
|
suffixes=('_'+self.source, '_'+other.source) |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
contract_match = create_identifier(contract_match) |
|
|
|
|
|
|
|
|
|
|
|
#contract_match.to_excel("CONTRACT_MATCH.xlsx") |
|
|
|
#contract_match.to_excel("CONTRACT_MATCH.xlsx") |
|
|
|
|
|
|
|
|
|
|
|
for col in ["vendor_name", "Resolution", "Notes"]: |
|
|
|
for col in ["vendor_name", "HideNextMonth", "Resolution"]: |
|
|
|
self._created_combined_col(col, contract_match, (self.source, other.source)) |
|
|
|
self._created_combined_col(col, contract_match, (self.source, other.source)) |
|
|
|
|
|
|
|
|
|
|
|
logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})") |
|
|
|
logger.debug(f"_requires_rec | contract_match:\n{contract_match.columns} ({contract_match.shape})") |
|
|
|
@ -159,7 +202,10 @@ class HoldReport(ABC): |
|
|
|
row["ID"] if row["Source"] == other.source else None |
|
|
|
row["ID"] if row["Source"] == other.source else None |
|
|
|
, axis=1) |
|
|
|
, axis=1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
no_match = create_identifier(no_match) |
|
|
|
|
|
|
|
|
|
|
|
logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})") |
|
|
|
logger.debug(f"_requires_rec | no_match:\n{no_match.columns} ({no_match.shape})") |
|
|
|
|
|
|
|
self.prev_recs, contract_match, no_match = self._remove_prev_recs(contract_match, no_match) |
|
|
|
|
|
|
|
|
|
|
|
return contract_match, no_match |
|
|
|
return contract_match, no_match |
|
|
|
|
|
|
|
|
|
|
|
@ -170,25 +216,34 @@ class HoldReport(ABC): |
|
|
|
""" |
|
|
|
""" |
|
|
|
logger.debug("Adding work columns!") |
|
|
|
logger.debug("Adding work columns!") |
|
|
|
df_cols: list[str] = df.columns.to_list() |
|
|
|
df_cols: list[str] = df.columns.to_list() |
|
|
|
WORK_COLS = ["Hide Next Month","Resolution"] |
|
|
|
WORK_COLS = ["HideNextMonth","Resolution"] |
|
|
|
for col in WORK_COLS: |
|
|
|
for col in WORK_COLS: |
|
|
|
if col not in df_cols: |
|
|
|
if col not in df_cols: |
|
|
|
df[col] = '' |
|
|
|
df[col] = '' |
|
|
|
return df |
|
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]: |
|
|
|
def reconcile(self, other: 'HoldReport') -> ReconciledReports: |
|
|
|
""" |
|
|
|
""" |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
assert self.source != other.source, f"Reports to reconcile must be from different sources.\ |
|
|
|
|
|
|
|
({self.source} , {other.source})." |
|
|
|
self._remove_full_matches(other) |
|
|
|
self._remove_full_matches(other) |
|
|
|
all_prev_reced = concat([self.prev_rec, other.prev_rec],ignore_index=True) |
|
|
|
|
|
|
|
|
|
|
|
if self.source == "OB": |
|
|
|
|
|
|
|
over_due: DataFrame = self.overdue |
|
|
|
|
|
|
|
filtered_gp: DataFrame = other.filtered |
|
|
|
|
|
|
|
elif self.source == "GP": |
|
|
|
|
|
|
|
over_due: DataFrame = other.overdue |
|
|
|
|
|
|
|
filtered_gp: DataFrame = self.filtered |
|
|
|
|
|
|
|
|
|
|
|
logger.debug(f"Removed matches:\n{self.df}") |
|
|
|
logger.debug(f"Removed matches:\n{self.df}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
amount_mismatch, no_match = self._requires_rec(other) |
|
|
|
amount_mismatch, no_match = self._requires_rec(other) |
|
|
|
|
|
|
|
|
|
|
|
logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})") |
|
|
|
logger.debug(f"reconcile | no_match unaltered\n{no_match.columns} ({no_match.shape})") |
|
|
|
logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})") |
|
|
|
logger.debug(f"reconcile | am_mm unaltered:\n{amount_mismatch.columns} ({amount_mismatch.shape})") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Formatting |
|
|
|
columns: list[str] = ["ID_GP", "ID_OB"] |
|
|
|
columns: list[str] = ["ID_GP", "ID_OB"] |
|
|
|
columns.extend(self.config["output_columns"]) |
|
|
|
columns.extend(self.config["output_columns"]) |
|
|
|
|
|
|
|
|
|
|
|
@ -209,19 +264,36 @@ class HoldReport(ABC): |
|
|
|
] |
|
|
|
] |
|
|
|
logger.info(f"no_match: {no_match.shape[0]}") |
|
|
|
logger.info(f"no_match: {no_match.shape[0]}") |
|
|
|
logger.info(f"am_mm: {amount_mismatch.shape[0]}") |
|
|
|
logger.info(f"am_mm: {amount_mismatch.shape[0]}") |
|
|
|
return no_match, amount_mismatch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
reconciled: ReconciledReports = ReconciledReports( |
|
|
|
|
|
|
|
no_match=no_match, |
|
|
|
|
|
|
|
amt_mismatch=amount_mismatch, |
|
|
|
|
|
|
|
prev_rec=self.prev_recs, |
|
|
|
|
|
|
|
gp_filtered=filtered_gp, |
|
|
|
|
|
|
|
ob_overdue = over_due |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
return reconciled |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OnBaseReport(HoldReport): |
|
|
|
class OnBaseReport(HoldReport): |
|
|
|
|
|
|
|
|
|
|
|
source = "OB" |
|
|
|
source = "OB" |
|
|
|
|
|
|
|
|
|
|
|
def get_overdue(self) -> DataFrame: |
|
|
|
def __init__(self, dataframe: DataFrame, reports_config: dict) -> None: |
|
|
|
|
|
|
|
self.overdue = self._get_overdue(dataframe) |
|
|
|
|
|
|
|
super().__init__(dataframe, reports_config) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
|
|
|
def _get_overdue(dataframe: DataFrame) -> DataFrame: |
|
|
|
""" |
|
|
|
""" |
|
|
|
""" |
|
|
|
""" |
|
|
|
self.df["InstallDate"] = to_datetime(self.df["InstallDate"]) |
|
|
|
dataframe["InstallDate"] = to_datetime(dataframe["InstallDate"]) |
|
|
|
self.df["InstallDate"].fillna(NaT, inplace=True) |
|
|
|
dataframe["InstallDate"].fillna(NaT, inplace=True) |
|
|
|
return self.df[self.df["InstallDate"].dt.date < datetime.date.today()] |
|
|
|
overdue: DataFrame = dataframe[dataframe["InstallDate"].dt.date\ |
|
|
|
|
|
|
|
< datetime.date.today()] |
|
|
|
|
|
|
|
return overdue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GreatPlainsReport(HoldReport): |
|
|
|
class GreatPlainsReport(HoldReport): |
|
|
|
@ -230,7 +302,7 @@ class GreatPlainsReport(HoldReport): |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, dataframe: DataFrame, report_config: dict) -> None: |
|
|
|
def __init__(self, dataframe: DataFrame, report_config: dict) -> None: |
|
|
|
|
|
|
|
|
|
|
|
self._filter( |
|
|
|
self.filtered: DataFrame = self._filter( |
|
|
|
gp_report_df= dataframe, |
|
|
|
gp_report_df= dataframe, |
|
|
|
doc_num_filters= report_config["gp_filters"]["doc_num_filters"], |
|
|
|
doc_num_filters= report_config["gp_filters"]["doc_num_filters"], |
|
|
|
good_po_num_regex= report_config["gp_filters"]["po_filter"] |
|
|
|
good_po_num_regex= report_config["gp_filters"]["po_filter"] |
|
|
|
@ -239,7 +311,8 @@ class GreatPlainsReport(HoldReport): |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
@staticmethod |
|
|
|
def _filter(gp_report_df: DataFrame, |
|
|
|
def _filter(gp_report_df: DataFrame, |
|
|
|
doc_num_filters: list[str], good_po_num_regex: str) -> DataFrame: |
|
|
|
doc_num_filters: list[str], good_po_num_regex: str |
|
|
|
|
|
|
|
) -> DataFrame: |
|
|
|
|
|
|
|
|
|
|
|
GOOD_PO_NUM = re.compile(good_po_num_regex, re.IGNORECASE) |
|
|
|
GOOD_PO_NUM = re.compile(good_po_num_regex, re.IGNORECASE) |
|
|
|
|
|
|
|
|
|
|
|
@ -257,15 +330,15 @@ class GreatPlainsReport(HoldReport): |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
# Get the rows that DO NOT fit the keep_mask |
|
|
|
# Get the rows that DO NOT fit the keep_mask |
|
|
|
rows_to_drop = gp_report_df[~keep_mask].index |
|
|
|
dropped_posotives: DataFrame = gp_report_df[~keep_mask] |
|
|
|
# Drop the rows to filter |
|
|
|
# Drop the rows to filter |
|
|
|
gp_report_df.drop(rows_to_drop, inplace=True) |
|
|
|
gp_report_df.drop(dropped_posotives.index, inplace=True) |
|
|
|
|
|
|
|
|
|
|
|
# Create a filter to remove rows that meet this requirment |
|
|
|
# Create a filter to remove rows that meet this requirment |
|
|
|
# Making this a negative in the keep mask is more trouble than |
|
|
|
# Making this a negative in the keep mask is more trouble than |
|
|
|
# it's worth |
|
|
|
# it's worth |
|
|
|
remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num) |
|
|
|
remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num) |
|
|
|
rows_to_drop = gp_report_df[remove_mask].index |
|
|
|
dropped_negatives: DataFrame = gp_report_df[remove_mask] |
|
|
|
gp_report_df.drop(rows_to_drop, inplace=True) |
|
|
|
gp_report_df.drop(dropped_negatives.index, inplace=True) |
|
|
|
|
|
|
|
|
|
|
|
return gp_report_df |
|
|
|
return concat([dropped_posotives,dropped_negatives], ignore_index=False) |