parent
d690c75399
commit
3baea9331e
@ -0,0 +1,152 @@ |
||||
import os |
||||
import pandas as pd |
||||
from datetime import datetime as dt, timedelta |
||||
import sys, getopt |
||||
import re |
||||
from pathlib import Path |
||||
import time |
||||
from pprint import pprint as prt |
||||
import numpy as np |
||||
|
||||
contract_number_regex = "\d{3}-\d{7}-\d{3}" |
||||
|
||||
def create_line_divider(breakage_list: list): |
||||
""" |
||||
This allows for the creation of a custom data extractor |
||||
Breakage list defines the split points that will be used for the line |
||||
Example |
||||
Given breakage_list [10, 20, 30] |
||||
using slot_num 0 in the resulting extract_line_slot will yield |
||||
characters 0 - 10 from the string. |
||||
Slot 1 would give characters 10 - 20 |
||||
""" |
||||
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): |
||||
""" |
||||
Pulls data from a line/string using break points defined by the |
||||
parent function. |
||||
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' |
||||
Will automatically convert numbers to floats |
||||
""" |
||||
assert(slot_num < len(breakage_list)+1) |
||||
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] |
||||
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] |
||||
data = line_string[low_range:high_range].strip().replace(",", "") |
||||
try: data = float(data) |
||||
except: pass |
||||
if debug: |
||||
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") |
||||
return data |
||||
return extract_line_slot |
||||
|
||||
|
||||
def net_invest_trial_balance(report: str, save_name: str): |
||||
lines = report.splitlines() |
||||
extracted_data_dict = { |
||||
'CUSTOMER NAME' : [], |
||||
'CURR INT RCVB' : [], |
||||
'UNEARNED BLENDED' : [], |
||||
'BLEND NET INV' : [], |
||||
'LEASE NUMBER' : [], |
||||
'GROSS CONTRACT' : [], |
||||
'CURR RENT RCVB' : [], |
||||
'UNEARN FIN' : [], |
||||
'END DEPOSIT' : [], |
||||
'SEC DEPOSIT' : [], |
||||
'LEASE PYMTS' : [], |
||||
'TOTAL' : [], |
||||
'CONTRACT STAT' : [], |
||||
'PAYMENTS RCVD' : [], |
||||
'REM RENT RCVB' : [], |
||||
'UNEARN RESID' : [], |
||||
'PROV LOSS' : [], |
||||
'NET RESERVE' : [], |
||||
'UNEARN INC' : [], |
||||
'BAL REMAINING' : [], |
||||
'RESIDUAL' : [], |
||||
'UNPAID INT' : [], |
||||
'NET INV' : [], |
||||
'UNEARNED IDC' : [], |
||||
"LESSOR": [] |
||||
} |
||||
lessors = [] |
||||
columns = list(extracted_data_dict.keys()) |
||||
line0 = list(zip(columns[0:4], [0,3,4,5])) |
||||
line1 = list(zip(columns[4:12], [i for i in range(0,8)])) |
||||
line2 = list(zip(columns[12:19], [i for i in range(0,7)])) |
||||
line3 = list(zip(columns[19:-1], [i for i in range(1,6)])) |
||||
|
||||
for l in [line0,line1,line2,line3]: |
||||
print(f"\n{l}") |
||||
|
||||
data_extractor = create_line_divider([18,32,50,66,84,100,117]) |
||||
for line in enumerate(lines): |
||||
slot1 = data_extractor(0,line[1],False) |
||||
if type(slot1) != str : continue |
||||
if re.search(contract_number_regex, slot1) != None: |
||||
data_section = lines[line[0]-1:line[0]+3] |
||||
|
||||
if data_section[0].find(".") == -1: |
||||
data_section[0] = lines[line[0]-2] |
||||
for ds in enumerate(data_section): |
||||
if ds[1].find(".") == -1: |
||||
if ds[0] < len(data_section) -1: |
||||
for i in range(ds[0], len(data_section)-1): |
||||
#print(f"{i}: { data_section[i]}") |
||||
data_section[i] = data_section[i+1] |
||||
#print(f"DELTA| {i}: { data_section[i]}") |
||||
data_section[3] = lines[line[0]+3] |
||||
else: |
||||
data_section[3] = lines[line[0]+3] |
||||
|
||||
|
||||
# [print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)] |
||||
# print('\n') |
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0], False)) for c in line0] |
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1], False)) for c in line1] |
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2], False)) for c in line2] |
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3], False)) for c in line3] |
||||
extracted_data_dict["LESSOR"].append(extracted_data_dict["LEASE NUMBER"][-1][0:3]) |
||||
if extracted_data_dict["LESSOR"][-1] not in lessors: |
||||
print(extracted_data_dict["LESSOR"][-1]) |
||||
lessors.append(extracted_data_dict["LESSOR"][-1]) |
||||
print(lessors) |
||||
for c in columns: |
||||
print(f"C: {c} | {len(extracted_data_dict[c])}") |
||||
print(lessors) |
||||
dataframe = pd.DataFrame(extracted_data_dict) |
||||
|
||||
summary_series = [] |
||||
for lessor in lessors: |
||||
reduced_df = dataframe.loc[dataframe["LESSOR"] == lessor] |
||||
del reduced_df["CUSTOMER NAME"] |
||||
del reduced_df["LEASE NUMBER"] |
||||
del reduced_df["CONTRACT STAT"] |
||||
reduced_df = reduced_df.replace("", np.NaN) |
||||
reduced_df = reduced_df.replace("REVOLV", np.NaN) |
||||
reduced_df = reduced_df.replace("ING ACCOUNT", np.NaN) |
||||
summation = reduced_df.sum(skipna=True, axis=0) |
||||
summation["LESSOR"] = lessor |
||||
summation["CONTRACT COUNT"] = len(reduced_df.index) |
||||
summary_series.append(summation) |
||||
summary_df = pd.concat(summary_series, axis=1).transpose().set_index("LESSOR") |
||||
prt(summary_df) |
||||
with pd.ExcelWriter(save_name) as writer: |
||||
dataframe.to_excel(writer, index=False, sheet_name="data") |
||||
pd.DataFrame(summary_df).to_excel(writer, index=True, sheet_name="Summary") |
||||
return dataframe |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Net Investment", errors="replace") as rep_file: |
||||
report = rep_file.read() |
||||
|
||||
prt(net_invest_trial_balance(report, "520_NI_TEST.xlsx")) |
||||
@ -0,0 +1,92 @@ |
||||
import os |
||||
import pandas as pd |
||||
from datetime import datetime as dt, timedelta |
||||
import sys, getopt |
||||
import re |
||||
from pathlib import Path |
||||
import time |
||||
from pprint import pprint as prt |
||||
import numpy as np |
||||
|
||||
contract_number_regex = "\d{3}-\d{7}-\d{3}" |
||||
|
||||
def create_line_divider(breakage_list: list): |
||||
""" |
||||
This allows for the creation of a custom data extractor |
||||
Breakage list defines the split points that will be used for the line |
||||
Example |
||||
Given breakage_list [10, 20, 30] |
||||
using slot_num 0 in the resulting extract_line_slot will yield |
||||
characters 0 - 10 from the string. |
||||
Slot 1 would give characters 10 - 20 |
||||
""" |
||||
def extract_line_slot(slot_num : int, line_string: str, debug : bool = False): |
||||
""" |
||||
Pulls data from a line/string using break points defined by the |
||||
parent function. |
||||
ONLY USE THIS FUNCTION THROUGH CREATION USING 'create_line_extractor' |
||||
Will automatically convert numbers to floats |
||||
""" |
||||
assert(slot_num < len(breakage_list)+1) |
||||
low_range = 0 if slot_num == 0 else breakage_list[slot_num-1] |
||||
high_range = len(line_string) if slot_num == len(breakage_list) else breakage_list[slot_num] |
||||
data = line_string[low_range:high_range].strip().replace(",", "") |
||||
try: data = float(data) |
||||
except: pass |
||||
if debug: |
||||
print(f"Slot num: {slot_num} | Low: {low_range} | High: {high_range} | Data: {data}") |
||||
return data |
||||
return extract_line_slot |
||||
|
||||
def renewal_net_invest_trial_balance(report: str, save_name: str): |
||||
lines = report.splitlines() |
||||
data_extractor = create_line_divider([21,29,43,58,71,88,99,113]) |
||||
extracted_data_dict = { |
||||
'CUSTOMER NAME' : [], |
||||
'TYPE' : [], |
||||
'GROSS RENEWAL' : [], |
||||
'REMAINING BAL' : [], |
||||
'FINANCED RES' : [], |
||||
'REMAINING RES' : [], |
||||
'LEASE PYMTS' : [], |
||||
'CONTRACT NUMBER' : [], |
||||
'RENEWAL' : [], |
||||
'PAYMENTS RCVD' : [], |
||||
'CUR RENT RCVB' : [], |
||||
'UNEARNED RIN' : [], |
||||
'SECURITY DEP' : [], |
||||
'NET INVEST' : [], |
||||
'UNEARN INCOME' : [], |
||||
'TOTAL' : [], |
||||
'REM RENT RCVB' : [], |
||||
'UNPAID RES' : [], |
||||
} |
||||
columns = list(extracted_data_dict.keys()) |
||||
line0 = list(zip(columns[0:7], [0,1,2,3,4,5,7])) |
||||
line1 = list(zip(columns[7:16], [i for i in range(0,9)])) |
||||
line2 = list(zip(columns[16:], [3,4])) |
||||
|
||||
for line in enumerate(lines): |
||||
slot1 = data_extractor(0,line[1],False) |
||||
if type(slot1) != str : continue |
||||
if re.search(contract_number_regex, slot1) != None: |
||||
data_section = lines[line[0]-1:line[0]+2] |
||||
|
||||
for ds in enumerate(data_section): |
||||
print(ds[1]) |
||||
if ds[1].find(".") == -1: |
||||
[print(f"\n{d[0]}: {d[1]}") for d in enumerate(data_section)] |
||||
print('\n') |
||||
|
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0] |
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1] |
||||
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2] |
||||
dataframe = pd.DataFrame(extracted_data_dict) |
||||
dataframe.to_excel(save_name, index=False) |
||||
return dataframe |
||||
|
||||
|
||||
with open("/config/workspace/LEAF/IL Extract SRC/2022.05.20 Renewal Net Investment", errors="replace") as rep_file: |
||||
report = rep_file.read() |
||||
|
||||
prt(renewal_net_invest_trial_balance(report, "rn_TESTING.xlsx")) |
||||
Loading…
Reference in new issue