You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
152 lines
5.8 KiB
152 lines
5.8 KiB
import os
|
|
import pandas as pd
|
|
from datetime import datetime as dt, timedelta
|
|
import sys, getopt
|
|
import re
|
|
from pathlib import Path
|
|
import time
|
|
from pprint import pprint as prt
|
|
import numpy as np
|
|
|
|
contract_number_regex = "\d{3}-\d{7}-\d{3}"
|
|
|
|
def create_line_divider(breakage_list: list):
|
|
"""
|
|
This allows for the creation of a custom data extractor
|
|
Breakage list defines the split points that will be used for the line
|
|
Example
|
|
Given breakage_list [10, 20, 30]
|
|
using slot_num 0 in the resulting extract_line_slot will yield
|
|
characters 0 - 10 from the string.
|
|
Slot 1 would give characters 10 - 20
|
|
"""
|
|
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False):
|
|
"""
|
|
Pulls data from a line/string using break points defined by the
|
|
parent function.
|
|
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor'
|
|
Will automatically convert numbers to floats
|
|
"""
|
|
assert(slot_num < len(breakage_list)+1)
|
|
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1]
|
|
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num]
|
|
data = line_string[low_range:high_range].strip().replace(",", "")
|
|
try: data = float(data)
|
|
except: pass
|
|
if debug:
|
|
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}")
|
|
return data
|
|
return extract_line_slot
|
|
|
|
|
|
def net_invest_trial_balance(report: str, save_name: str):
|
|
lines = report.splitlines()
|
|
extracted_data_dict = {
|
|
'CUSTOMER NAME' : [],
|
|
'CURR INT RCVB' : [],
|
|
'UNEARNED BLENDED' : [],
|
|
'BLEND NET INV' : [],
|
|
'LEASE NUMBER' : [],
|
|
'GROSS CONTRACT' : [],
|
|
'CURR RENT RCVB' : [],
|
|
'UNEARN FIN' : [],
|
|
'END DEPOSIT' : [],
|
|
'SEC DEPOSIT' : [],
|
|
'LEASE PYMTS' : [],
|
|
'TOTAL' : [],
|
|
'CONTRACT STAT' : [],
|
|
'PAYMENTS RCVD' : [],
|
|
'REM RENT RCVB' : [],
|
|
'UNEARN RESID' : [],
|
|
'PROV LOSS' : [],
|
|
'NET RESERVE' : [],
|
|
'UNEARN INC' : [],
|
|
'BAL REMAINING' : [],
|
|
'RESIDUAL' : [],
|
|
'UNPAID INT' : [],
|
|
'NET INV' : [],
|
|
'UNEARNED IDC' : [],
|
|
"LESSOR": []
|
|
}
|
|
lessors = []
|
|
columns = list(extracted_data_dict.keys())
|
|
line0 = list(zip(columns[0:4], [0,3,4,5]))
|
|
line1 = list(zip(columns[4:12], [i for i in range(0,8)]))
|
|
line2 = list(zip(columns[12:19], [i for i in range(0,7)]))
|
|
line3 = list(zip(columns[19:-1], [i for i in range(1,6)]))
|
|
|
|
for l in [line0,line1,line2,line3]:
|
|
print(f"\n{l}")
|
|
|
|
data_extractor = create_line_divider([18,32,50,66,84,100,117])
|
|
for line in enumerate(lines):
|
|
slot1 = data_extractor(0,line[1],False)
|
|
if type(slot1) != str : continue
|
|
if re.search(contract_number_regex, slot1) != None:
|
|
data_section = lines[line[0]-1:line[0]+3]
|
|
|
|
if data_section[0].find(".") == -1:
|
|
data_section[0] = lines[line[0]-2]
|
|
for ds in enumerate(data_section):
|
|
if ds[1].find(".") == -1:
|
|
if ds[0] < len(data_section) -1:
|
|
for i in range(ds[0], len(data_section)-1):
|
|
#print(f"{i}: { data_section[i]}")
|
|
data_section[i] = data_section[i+1]
|
|
#print(f"DELTA| {i}: { data_section[i]}")
|
|
data_section[3] = lines[line[0]+3]
|
|
else:
|
|
data_section[3] = lines[line[0]+3]
|
|
|
|
|
|
# [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)]
|
|
# print('\n')
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0], False)) for c in line0]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1], False)) for c in line1]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2], False)) for c in line2]
|
|
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3], False)) for c in line3]
|
|
extracted_data_dict["LESSOR"].append(extracted_data_dict["LEASE NUMBER"][-1][0:3])
|
|
if extracted_data_dict["LESSOR"][-1] not in lessors:
|
|
print(extracted_data_dict["LESSOR"][-1])
|
|
lessors.append(extracted_data_dict["LESSOR"][-1])
|
|
print(lessors)
|
|
for c in columns:
|
|
print(f"C: {c} | {len(extracted_data_dict[c])}")
|
|
print(lessors)
|
|
dataframe = pd.DataFrame(extracted_data_dict)
|
|
|
|
summary_series = []
|
|
for lessor in lessors:
|
|
reduced_df = dataframe.loc[dataframe["LESSOR"] == lessor]
|
|
del reduced_df["CUSTOMER NAME"]
|
|
del reduced_df["LEASE NUMBER"]
|
|
del reduced_df["CONTRACT STAT"]
|
|
reduced_df = reduced_df.replace("", np.NaN)
|
|
reduced_df = reduced_df.replace("REVOLV", np.NaN)
|
|
reduced_df = reduced_df.replace("ING ACCOUNT", np.NaN)
|
|
summation = reduced_df.sum(skipna=True, axis=0)
|
|
summation["LESSOR"] = lessor
|
|
summation["CONTRACT COUNT"] = len(reduced_df.index)
|
|
summary_series.append(summation)
|
|
summary_df = pd.concat(summary_series, axis=1).transpose().set_index("LESSOR")
|
|
prt(summary_df)
|
|
with pd.ExcelWriter(save_name) as writer:
|
|
dataframe.to_excel(writer, index=False, sheet_name="data")
|
|
pd.DataFrame(summary_df).to_excel(writer, index=True, sheet_name="Summary")
|
|
return dataframe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Net Investment", errors="replace") as rep_file:
|
|
report = rep_file.read()
|
|
|
|
prt(net_invest_trial_balance(report, "520_NI_TEST.xlsx")) |