from pandas import DataFrame, merge, to_datetime, NaT from numpy import concatenate from abc import ABC, abstractmethod from logging import getLogger import re from typing import Literal import datetime from helpers import CN_REGEX logger = getLogger(__name__) class HoldReport(ABC): source = "" def __init__(self, dataframe: DataFrame, reports_config: dict) -> None: self.config = reports_config self.df = dataframe self._normalize() def _normalize(self): # Rename the columns to standardize the column names self.df.rename( columns= { unique_cols[self.source] : common_col for common_col, unique_cols in self.config["shared_columns"].items() }, inplace=True) # Convert the on-hold amount column to float format and round to two decimal places self.df["onhold_amount"] = self.df["onhold_amount"].astype(float).round(2) # Use regex to extract the contract number from the column values and create a new column with the standardized format self.df["contract_number"] = self.df["contract_number"].apply( lambda cn: str(cn) if not re.search(CN_REGEX, str(cn)) else re.search(CN_REGEX, str(cn)).group(0) ) # Create a new column with a unique transaction ID self.df["ID"] = self.df["contract_number"] +'_'+\ self.df["onhold_amount"].astype(str) # Create a new column with the data source self.df["Source"] = self.source def _get_no_match(self, other: 'HoldReport'): # Merge the two DataFrames using the contract number as the join key outer_merge = merge( self.df, other.df, how="outer", on=["contract_number"], suffixes=('_'+self.source, '_'+other.source) ) # Filter the merged DataFrame to include only the transactions that do not have a match in both OBT and GPT no_match = outer_merge.loc[ (outer_merge[f"Source_{self.source}"].isna()) | (outer_merge[f"Source_{other.source}"].isna()) ] # Fill in missing values and drop unnecessary columns no_match["Source"] = no_match[f"Source_{self.source}"].fillna("GP") no_match["onhold_amount"] = no_match[f"onhold_amount_{self.source}"].fillna( no_match[f"onhold_amount_{other.source}"] ) no_match["vendor_name"] = no_match[f"vendor_name_{self.source}"].fillna( no_match[f"vendor_name_{other.source}"] ) return no_match def _get_contract_matches(self, other: 'HoldReport') -> DataFrame: """ """ # Merge the two filtered DataFrames on the contract number contract_match = merge( self.df, other.df, how="inner", on=["contract_number"], suffixes=('_'+self.source, '_'+other.source) ) contract_match["vendor_name"] = contract_match[f"vendor_name_{self.source}"].fillna( contract_match[f"vendor_name_{other.source}"] ) return contract_match @staticmethod def _add_work_columns(df: DataFrame) -> DataFrame: """ Add empty columns to the dataframe to faciliate working through the report. """ WORK_COLS = ["Resolution", "Notes"] for col in WORK_COLS: df[col] = '' return df def reconcile(self, other: 'HoldReport') -> tuple[DataFrame]: """ """ no_match: DataFrame = self._get_no_match(other) no_match.to_excel("NOMATCH.xlsx") logger.debug(f"No_match: {no_match}") amount_mismatch: DataFrame = self._get_contract_matches(other) amount_mismatch.to_excel("AMTMM.xlsx") logger.debug(f"amt_mismatche: {no_match}") # Select and reorder columns no_match = no_match[ ["Source"].extend(self.config["output_columns"]) ] no_match = self._add_work_columns(no_match) amount_mismatch = amount_mismatch[ self.config["output_columns"] ] amount_mismatch = self._add_work_columns(amount_mismatch) return no_match, amount_mismatch class OnBaseReport(HoldReport): source = "OB" def get_overdue(self) -> DataFrame: """ """ self.df["install_date"] = to_datetime(self.df["install_date"]) self.df["install_date"].fillna(NaT, inplace=True) return self.df[self.df["install_date"].dt.date < datetime.date.today()] class GreatPlainsReport(HoldReport): source = "GP" filted_df: bool = False def __init__(self, dataframe: DataFrame, report_config: dict) -> None: self._filter( gp_report_df= dataframe, doc_num_filters= report_config["gp_filters"]["doc_num_filters"], good_po_num_regex= report_config["gp_filters"]["po_filter"] ) super().__init__(dataframe, report_config) @staticmethod def _filter(gp_report_df: DataFrame, doc_num_filters: list[str], good_po_num_regex: str) -> DataFrame: GOOD_PO_NUM = re.compile(good_po_num_regex, re.IGNORECASE) bad_doc_num = '' rx : str for rx in doc_num_filters: bad_doc_num += f"({rx})|" bad_doc_num = re.compile(bad_doc_num[:-1], re.IGNORECASE) # Create a mask/filter that will keep rows that match these # requirments keep_mask = ( (gp_report_df["Document Type"] == "Invoice") & (gp_report_df["Purchase Order Number"].str.contains(GOOD_PO_NUM)) ) # Get the rows that DO NOT fit the keep_mask rows_to_drop = gp_report_df[~keep_mask].index # Drop the rows to filter gp_report_df.drop(rows_to_drop, inplace=True) # Create a filter to remove rows that meet this requirment # Making this a negative in the keep mask is more trouble than # it's worth remove_mask = gp_report_df["Document Number"].str.contains(bad_doc_num) rows_to_drop = gp_report_df[remove_mask].index gp_report_df.drop(rows_to_drop, inplace=True) return gp_report_df