Parses portfolio related IL outputs to Excel
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PortfolioParser/getcol.py

20 lines
825 B

import re
import pandas as pd
for input in ["ASSET", "CUST", "DOB"]:
with open(f"Inputs/{input}", errors="replace") as reportFile:
report: str = reportFile.read()
# Removes characters that cause errors
report: str = report.replace("^"," ")
colDict = {"ColName": [], "start": [], "end": []}
for line in report.splitlines():
print(line.strip())
if len(line.strip()) > 50:
matches = re.finditer('(\w|\.|/|#)+\s', line)
for match in matches:
print(match)
colDict["ColName"].append(match.group())
colDict["start"].append(match.start())
colDict["end"].append(match.end())
break
pd.DataFrame(colDict).to_csv(f"Outputs/{input}.csv")