You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
825 B
20 lines
825 B
import re
|
|
import pandas as pd
|
|
|
|
for input in ["ASSET", "CUST", "DOB"]:
|
|
with open(f"Inputs/{input}", errors="replace") as reportFile:
|
|
report: str = reportFile.read()
|
|
# Removes characters that cause errors
|
|
report: str = report.replace("^"," ")
|
|
colDict = {"ColName": [], "start": [], "end": []}
|
|
for line in report.splitlines():
|
|
print(line.strip())
|
|
if len(line.strip()) > 50:
|
|
matches = re.finditer('(\w|\.|/|#)+\s', line)
|
|
for match in matches:
|
|
print(match)
|
|
colDict["ColName"].append(match.group())
|
|
colDict["start"].append(match.start())
|
|
colDict["end"].append(match.end())
|
|
break
|
|
pd.DataFrame(colDict).to_csv(f"Outputs/{input}.csv") |