Fixed failure of large df | save moved into process functions

v3.1
Griffiths Lott 4 years ago
parent 87667c0fa3
commit 6a9811fbc0
  1. 73
      ILExtract.py
  2. 6
      main.py

@ -54,7 +54,7 @@ class ILReport:
return 1
try:
# Run the associated method to extract the data and get the dataframe
dataframe = self.x_method(report)
dataframe = self.x_method(report, self.output_location)
try:
assert(len(dataframe) > 1)
except Exception as e:
@ -65,14 +65,14 @@ class ILReport:
print(f"{self.output_name} failed to process:\n{e}")
self.successful = False
return 1
try:
# Save the dataframe as an excel document
dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False)
except Exception as e:
self.successful = False
print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}")
return 1
self.successful = True
# try:
# # Save the dataframe as an excel document
# dataframe.to_excel(f"{self.output_location}/{self.output_name}_{dt.now().strftime('%Y%m%d-%H%M')}.xlsx", index = False)
# except Exception as e:
# self.successful = False
# print(f"{self.output_name} failed to save to excel!\n{dataframe}\n{e}")
# return 1
# self.successful = True
return 0
def process(self):
try:
@ -85,7 +85,7 @@ class ILReport:
return 1
try:
# Run the associated method to extract the data and get the dataframe
dataframe = self.x_method(report)
dataframe = self.x_method(report, self.output_name)
try:
assert(len(dataframe) > 1)
except Exception as e:
@ -161,7 +161,7 @@ COMMON REGEX COMPONENTS
"""
def ach(report: str):
def ach(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"ContractNumber" : [],
@ -177,10 +177,11 @@ def ach(report: str):
for line in enumerate(lines):
if (re.search(contract_number_regex, line[1]) != None) & (re.search(bank_number_regex, line[1]) != None):
[extracted_data_dict[columns[c]].append(data_extractor(c, line[1])) for c in range(0, len(columns))]
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
def disposition(report: str):
def disposition(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"ContractNumber" : [],
@ -201,10 +202,12 @@ def disposition(report: str):
if re.search(contract_number_regex, data_extractor(0,line[1])):
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0, len(columns)-1)]
extracted_data_dict["Customer Name"].append(lines[line[0]+1].strip())
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
def gainloss(report: str):
def gainloss(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
'REM RENT RCVB' : [],
@ -286,10 +289,11 @@ def gainloss(report: str):
disp_descriptoin.append(" ".join(disp_split[1:]))
df["DISPOSITION CODE"] = disp_code
df["DISPOSITION DESC"] = disp_descriptoin
df.to_excel(save_name, index=False, engine="xlsxwrtier")
return df
# Works for Net-inv-loans & NIV-after
def net_invest_trial_balance(report: str):
def net_invest_trial_balance(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
'CUSTOMER NAME' : [],
@ -333,10 +337,12 @@ def net_invest_trial_balance(report: str):
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[3])) for c in line3]
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
def lockbox(report: str):
def lockbox(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"CustomerName" : [],
@ -387,10 +393,12 @@ def lockbox(report: str):
i += 1
# Once it hits, add the name to the dict
extracted_data_dict["CustomerName"].append(data_extractor(7,lines[line[0]+i]))
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
def minv(report: str):
def minv(report: str, save_name: str):
lines = report.splitlines()
data_extractor = create_line_divider([15,32,52,71,83,107,116,128])
extracted_data_dict = {
@ -409,10 +417,12 @@ def minv(report: str):
if re.search(contract_number_regex, line[1]) != None:
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1],debug=False)) for c in range(0,len(columns))]
#All the list lengths need to be the same so if anything was missed it will fail to build
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
# Good for PUB_WIRES, VMCC, PBP_EPAY, returned check
def payment_transactions(report: str):
def payment_transactions(report: str, save_name: str):
lines = report.splitlines()
data_extractor = create_line_divider([6,33,52,62,80,89,110,121])
extracted_data_dict = {
@ -446,10 +456,12 @@ def payment_transactions(report: str):
extracted_data_dict['CUSTOMER NAME'].append(cname)
inv_no = lines[line[0]+1][79:90].strip()
extracted_data_dict['INV NO'].append(inv_no)
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
def renewal_net_invest_trial_balance(report: str):
def renewal_net_invest_trial_balance(report: str, save_name: str):
lines = report.splitlines()
data_extractor = create_line_divider([21,29,43,58,71,88,99,113])
extracted_data_dict = {
@ -485,10 +497,12 @@ def renewal_net_invest_trial_balance(report: str):
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[0])) for c in line0]
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[1])) for c in line1]
[extracted_data_dict[c[0]].append(data_extractor(c[1], data_section[2])) for c in line2]
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe
def unapplied(report: str):
def unapplied(report: str, save_name: str):
lines = report.splitlines()
extracted_data_dict = {
"Trans Num" : [],
@ -521,5 +535,6 @@ def unapplied(report: str):
(re.search("\d{2}/\d{2}/\d{4}", str(data_extractor(3,line[1],debug=False))) != None):
[extracted_data_dict[columns[c]].append(data_extractor(c,line[1])) for c in range(0,9)]
[extracted_data_dict[columns[8+c]].append(data_extractor(c,lines[line[0]+1])) for c in range(1,len(columns)-8)]
return pd.DataFrame(extracted_data_dict)
dataframe = pd.DataFrame(extracted_data_dict)
dataframe.to_excel(save_name, index=False, engine="xlsxwrtier")
return dataframe

@ -36,7 +36,7 @@ class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
self.inputFile = inFile[0]
inFileEnd = inFile[0].split('/')[-1]
outputRoot = self.inputFile.removesuffix(inFileEnd)
self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.csv"
self.outputFile = f"{outputRoot}{self.reportTypeCB.currentText()}_{dt.now().strftime('%Y%m%d_%H%M')}.xlsx"
self.outputFileLE.setText(self.outputFile)
if self.reportTypeCB.currentText().split(" ")[-1].lower() not in self.inputFile.lower():
print("Possibly wrong file type")
@ -96,7 +96,7 @@ Make sure you select the correct report type before processing!")
extraction_function=extract_function,
output_location=self.outputFile,
).process()
dataframe.to_csv(self.outputFile, index=False)
dataframe.to_excel(self.outputFile, index=False, engine="xlsxwriter")
smallDF = dataframe.iloc[0:500,:]
self.inputFilePreview.setText(smallDF.to_html(index=False))
self.openReportButton.setEnabled(True)
@ -110,7 +110,7 @@ Make sure you select the correct report type before processing!")
self.inputFilePreview.setText(df.to_html())
def to_clipboard(self):
df = pd.read_csv(self.outputFile)
df = pd.read_excel(self.outputFile)
df.to_clipboard(excel=True)

Loading…
Cancel
Save