diff --git a/NI_sum.py b/NI_sum.py new file mode 100644 index 0000000..d15c395 --- /dev/null +++ b/NI_sum.py @@ -0,0 +1,152 @@ +import os +import pandas as pd +from datetime import datetime as dt, timedelta +import sys, getopt +import re +from pathlib import Path +import time +from pprint import pprint as prt +import numpy as np + +contract_number_regex = "\d{3}-\d{7}-\d{3}" + +def create_line_divider(breakage_list: list): + """ + This allows for the creation of a custom data extractor + Breakage list defines the split points that will be used for the line + Example + Given breakage_list [10, 20, 30] + using slot_num 0 in the resulting extract_line_slot will yield + characters 0 - 10 from the string. + Slot 1 would give characters 10 - 20 + """ + def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): + """ + Pulls data from a line/string using break points defined by the + parent function. + ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' + Will automatically convert numbers to floats + """ + assert(slot_num < len(breakage_list)+1) + low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] + high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + data = line_string[low_range:high_range].strip().replace(",", "") + try: data = float(data) + except: pass + if debug: + print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") + return data + return extract_line_slot + + +def net_invest_trial_balance(report: str, save_name: str): + lines = report.splitlines() + extracted_data_dict = { + 'CUSTOMER NAME' : [], + 'CURR INT RCVB' : [], + 'UNEARNED BLENDED' : [], + 'BLEND NET INV' : [], + 'LEASE NUMBER' : [], + 'GROSS CONTRACT' : [], + 'CURR RENT RCVB' : [], + 'UNEARN FIN' : [], + 'END DEPOSIT' : [], + 'SEC DEPOSIT' : [], + 'LEASE PYMTS' : [], + 'TOTAL' : [], + 'CONTRACT STAT' : [], + 'PAYMENTS RCVD' : [], + 'REM RENT RCVB' : [], + 'UNEARN RESID' : [], + 'PROV LOSS' : [], + 'NET RESERVE' : [], + 'UNEARN INC' : [], + 'BAL REMAINING' : [], + 'RESIDUAL' : [], + 'UNPAID INT' : [], + 'NET INV' : [], + 'UNEARNED IDC' : [], + "LESSOR": [] + } + lessors = [] + columns = list(extracted_data_dict.keys()) + line0 = list(zip(columns[0:4], [0,3,4,5])) + line1 = list(zip(columns[4:12], [i for i in range(0,8)])) + line2 = list(zip(columns[12:19], [i for i in range(0,7)])) + line3 = list(zip(columns[19:-1], [i for i in range(1,6)])) + + for l in [line0,line1,line2,line3]: + print(f"\n{l}") + + data_extractor = create_line_divider([18,32,50,66,84,100,117]) + for line in enumerate(lines): + slot1 = data_extractor(0,line[1],False) + if type(slot1) != str : continue + if re.search(contract_number_regex, slot1) != None: + data_section = lines[line[0]-1:line[0]+3] + + if data_section[0].find(".") == -1: + data_section[0] = lines[line[0]-2] + for ds in enumerate(data_section): + if ds[1].find(".") == -1: + if ds[0] < len(data_section) -1: + for i in range(ds[0], len(data_section)-1): + #print(f"{i}: { data_section[i]}") + data_section[i] = data_section[i+1] + #print(f"DELTA| {i}: { data_section[i]}") + data_section[3] = lines[line[0]+3] + else: + data_section[3] = lines[line[0]+3] + + + # [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)] + # print('\n') + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0], False)) for c in line0] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1], False)) for c in line1] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2], False)) for c in line2] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3], False)) for c in line3] + extracted_data_dict["LESSOR"].append(extracted_data_dict["LEASE NUMBER"][-1][0:3]) + if extracted_data_dict["LESSOR"][-1] not in lessors: + print(extracted_data_dict["LESSOR"][-1]) + lessors.append(extracted_data_dict["LESSOR"][-1]) + print(lessors) + for c in columns: + print(f"C: {c} | {len(extracted_data_dict[c])}") + print(lessors) + dataframe = pd.DataFrame(extracted_data_dict) + + summary_series = [] + for lessor in lessors: + reduced_df = dataframe.loc[dataframe["LESSOR"] == lessor] + del reduced_df["CUSTOMER NAME"] + del reduced_df["LEASE NUMBER"] + del reduced_df["CONTRACT STAT"] + reduced_df = reduced_df.replace("", np.NaN) + reduced_df = reduced_df.replace("REVOLV", np.NaN) + reduced_df = reduced_df.replace("ING ACCOUNT", np.NaN) + summation = reduced_df.sum(skipna=True, axis=0) + summation["LESSOR"] = lessor + summation["CONTRACT COUNT"] = len(reduced_df.index) + summary_series.append(summation) + summary_df = pd.concat(summary_series, axis=1).transpose().set_index("LESSOR") + prt(summary_df) + with pd.ExcelWriter(save_name) as writer: + dataframe.to_excel(writer, index=False, sheet_name="data") + pd.DataFrame(summary_df).to_excel(writer, index=True, sheet_name="Summary") + return dataframe + + + + + + + + + + + + +with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Net Investment", errors="replace") as rep_file: + report = rep_file.read() + +prt(net_invest_trial_balance(report, "520_NI_TEST.xlsx")) \ No newline at end of file diff --git a/RenewalTest.py b/RenewalTest.py new file mode 100644 index 0000000..96b4335 --- /dev/null +++ b/RenewalTest.py @@ -0,0 +1,92 @@ +import os +import pandas as pd +from datetime import datetime as dt, timedelta +import sys, getopt +import re +from pathlib import Path +import time +from pprint import pprint as prt +import numpy as np + +contract_number_regex = "\d{3}-\d{7}-\d{3}" + +def create_line_divider(breakage_list: list): + """ + This allows for the creation of a custom data extractor + Breakage list defines the split points that will be used for the line + Example + Given breakage_list [10, 20, 30] + using slot_num 0 in the resulting extract_line_slot will yield + characters 0 - 10 from the string. + Slot 1 would give characters 10 - 20 + """ + def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): + """ + Pulls data from a line/string using break points defined by the + parent function. + ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' + Will automatically convert numbers to floats + """ + assert(slot_num < len(breakage_list)+1) + low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] + high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] + data = line_string[low_range:high_range].strip().replace(",", "") + try: data = float(data) + except: pass + if debug: + print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") + return data + return extract_line_slot + +def renewal_net_invest_trial_balance(report: str, save_name: str): + lines = report.splitlines() + data_extractor = create_line_divider([21,29,43,58,71,88,99,113]) + extracted_data_dict = { + 'CUSTOMER NAME' : [], + 'TYPE' : [], + 'GROSS RENEWAL' : [], + 'REMAINING BAL' : [], + 'FINANCED RES' : [], + 'REMAINING RES' : [], + 'LEASE PYMTS' : [], + 'CONTRACT NUMBER' : [], + 'RENEWAL' : [], + 'PAYMENTS RCVD' : [], + 'CUR RENT RCVB' : [], + 'UNEARNED RIN' : [], + 'SECURITY DEP' : [], + 'NET INVEST' : [], + 'UNEARN INCOME' : [], + 'TOTAL' : [], + 'REM RENT RCVB' : [], + 'UNPAID RES' : [], + } + columns = list(extracted_data_dict.keys()) + line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7])) + line1 = list(zip(columns[7:16], [i for i in range(0,9)])) + line2 = list(zip(columns[16:], [3,4])) + + for line in enumerate(lines): + slot1 = data_extractor(0,line[1],False) + if type(slot1) != str : continue + if re.search(contract_number_regex, slot1) != None: + data_section = lines[line[0]-1:line[0]+2] + + for ds in enumerate(data_section): + print(ds[1]) + if ds[1].find(".") == -1: + [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)] + print('\n') + + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] + [extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] + dataframe = pd.DataFrame(extracted_data_dict) + dataframe.to_excel(save_name, index=False) + return dataframe + + +with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Renewal Net Investment", errors="replace") as rep_file: + report = rep_file.read() + +prt(renewal_net_invest_trial_balance(report, "rn_TESTING.xlsx")) \ No newline at end of file