InfoLeaseExtract/ach_special.py

import os
import pandas as pd
from datetime import datetime as dt, timedelta
import sys, getopt
import re
from pathlib import Path
import time
from pprint import pprint as prt

contract_number_regex = "\d{3}-\d{7}-\d{3}"

def create_line_divider(breakage_list: list):
    """
    This allows for the creation of a custom data extractor
    Breakage list defines the split points that will be used for the line
    Example
    Given breakage_list [10, 20, 30]
    using slot_num 0 in the resulting extract_line_slot will yield
    characters 0 - 10 from the string.
    Slot 1 would give characters 10 - 20
    """
    def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
        """
        Pulls data from a line/string using break points defined by the
        parent function.
        ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
        Will automatically convert numbers to floats
        """
        assert(slot_num < len(breakage_list)+1)
        low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
        high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
        data = line_string[low_range:high_range].strip().replace(",", "")
        try: data = float(data)
        except: pass
        if debug:
            print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
        return data
    return extract_line_slot


def ach(report: str, save_name: str):

    lines = report.splitlines()
    extracted_data_dict = {
    "ContractNumber" : [],
    "CustomerName" : [],
    "BankCode" : [],
    "BankNumber": [],
    "AccountNumber" : [],
    "Payment" : [],
    "Batch": [],
    "Lessor": [],
    "PaymentDate": [],
    }
    columns = list(extracted_data_dict.keys())
    batches = {
        "batch_num": [],
        "payment_date": [],
        "lessor": [],
        "count": [],
        "total": []
    }

    data_extractor = create_line_divider([19,57,67,82,104])
    bank_number_regex = "\d{9}"
    batch_num_regex = "BATCH \d{4} TOTAL"
    for line in enumerate(lines):
        if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
            [extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns)-3)]
        if re.search(batch_num_regex, line[1]) != None:
            batches["batch_num"].append(line[1][96:101])
            batches["payment_date"].append(lines[line[0]+2][114:125])
            batches["lessor"].append(extracted_data_dict["ContractNumber"][-1][0:3])
            batches["total"].append(float(line[1][107:125].strip().replace(",", "")))
            batches["count"].append(float(lines[line[0]+6][107:125].strip().replace(",", "")))
            [extracted_data_dict["Batch"].append(batches["batch_num"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Batch"])))]
            [extracted_data_dict["Lessor"].append(batches["lessor"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["Lessor"])))]
            [extracted_data_dict["PaymentDate"].append(batches["payment_date"][-1]) for _ in range(0, (len(extracted_data_dict["BankCode"]) - len(extracted_data_dict["PaymentDate"])))]

    dataframe = pd.DataFrame(extracted_data_dict)

    return dataframe

with open("/config/workspace/LEAF/IL Extract SRC/2022.05.04_ACH_C") as rep_file:
    report = rep_file.read()

prt(ach(report, "ACH_TESTING.xlsx"))