You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
208 lines
7.7 KiB
208 lines
7.7 KiB
import re
|
|
from pandas import DataFrame
|
|
from typing import Union
|
|
from logging import debug as dbg, getLogger, exception as exc, FileHandler, StreamHandler
|
|
import win32clipboard
|
|
import argparse as ap
|
|
from pathlib import Path
|
|
|
|
|
|
def create_table(xmlStr: str) -> Union[DataFrame, Exception]:
|
|
dataDict = {
|
|
"SEQ": [],
|
|
}
|
|
MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S"
|
|
senarios = re.finditer(MATCH_SCENARIO,xmlStr)
|
|
dbg(senarios)
|
|
senario: re.Match
|
|
for senario in senarios:
|
|
senarioGroup = senario.group()
|
|
seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group()
|
|
seq = seqMatch[1:-1]
|
|
dbg(f"\nSeq: {seq}")
|
|
|
|
CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>"
|
|
UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up"
|
|
|
|
c = list(re.finditer(CONDITION_REGEX,senarioGroup))
|
|
dbg(f"\n\nSenario Group: {senarioGroup}")
|
|
updates = list(re.finditer(UPDATE_REGEX, senarioGroup))
|
|
dbg(f"{seq} | Updates: {updates}")
|
|
dbg(f"conditions:\n{[cond for cond in c]}")
|
|
senarioDict = {}
|
|
senarioDict["SEQ"] = int(seq)
|
|
for m in c:
|
|
group = m.group()
|
|
idStart, idEnd = re.search("\"[^\"]*\"", group).span()
|
|
id = group[idStart+1:idEnd-1]
|
|
dbg(f"SEQ: {seq} | {id}")
|
|
valueGroup = re.search("e\">(.)*<", group)
|
|
if valueGroup == None:
|
|
valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group)
|
|
value = valueGroup.group()[4:-3].strip()
|
|
else:
|
|
value = valueGroup.group()[3:-1]
|
|
dbg(f"SEQ: {seq} | {valueGroup}")
|
|
dbg(f"SEQ: {seq} | {value}")
|
|
senarioDict[id] = value
|
|
update: re.Match
|
|
for update in updates:
|
|
update = update.group()
|
|
dbg(f"{seq} | Update: {update}")
|
|
idMatch = re.search(r"\"\w+\"",update).span()
|
|
dbg(f"ID: {idMatch}")
|
|
id = update[idMatch[0]+1:idMatch[1]-1]
|
|
valueMatch = re.search(">(\w+|\.)+</Value>", update).span()
|
|
dbg(f"value: {valueMatch}")
|
|
value = update[valueMatch[0]+1:valueMatch[1]-8]
|
|
dbg(f"{seq} UPDATE | {id} : {value}")
|
|
senarioDict[id] = value
|
|
# Now merge the values from that senario into the main dict
|
|
seen = []
|
|
for key in dataDict.keys():
|
|
dbg(dataDict[key])
|
|
try:
|
|
senarioValue = senarioDict[key]
|
|
except KeyError:
|
|
senarioValue = ''
|
|
dataDict[key].append(senarioValue)
|
|
seen.append(key)
|
|
for key in [k for k in senarioDict.keys() if k not in seen]:
|
|
dataFill = ['' for _ in range(1,int(seq))]
|
|
dataFill.append(senarioDict[key])
|
|
dataDict[key] = dataFill
|
|
dbg(f"New key: {key} | {dataDict[key]}")
|
|
|
|
dbg(f"{seq} | {dataDict}\n")
|
|
|
|
dbg(dataDict)
|
|
if getLogger().level == 10:
|
|
for key in dataDict.keys():
|
|
dbg(f"{key} : {len(dataDict[key])}")
|
|
try:
|
|
table = DataFrame(dataDict)
|
|
table.set_index('SEQ', inplace=True)
|
|
dbg(table)
|
|
if table.empty:
|
|
raise Exception("No data found...", color='RED', effect='BOLD')
|
|
return table
|
|
except Exception as e:
|
|
return e
|
|
|
|
|
|
def process_clipboard() -> str:
|
|
correct = False
|
|
while not correct:
|
|
win32clipboard.OpenClipboard()
|
|
try:
|
|
xml = win32clipboard.GetClipboardData()
|
|
except:
|
|
xml = "None"
|
|
win32clipboard.CloseClipboard()
|
|
print(f"\n\nYour current clipboard is as follows:")
|
|
print(xml)
|
|
yn = input("\nIs this the XML you'd like to parse? (y/n)\n >")
|
|
if yn.lower() == "debug":
|
|
getLogger().setLevel(10)
|
|
print("\nYou have now entered debug mode...")
|
|
correct = True if re.search("(?i)y|1", yn) != None else False
|
|
if not correct:
|
|
input("Please copy the xml then press enter...")
|
|
return xml
|
|
|
|
|
|
def alter_suffix(p: Path, desired: str) -> Path:
|
|
if p.suffix != desired:
|
|
p = Path(p.name.replace(p.suffix, desired))
|
|
return p
|
|
|
|
def main(xml: str) -> DataFrame:
|
|
table = None
|
|
while type(table) != DataFrame:
|
|
|
|
table: Union[DataFrame, Exception] = create_table(xml)
|
|
if type(table) != DataFrame:
|
|
print(f"\n\nENCOUNTERED ERROR!:\n{table}\n")
|
|
input("Please try again...")
|
|
continue
|
|
print(f"Table sample:")
|
|
print(table)
|
|
table.to_clipboard()
|
|
print("This table is now in your clipboard to paste into excel.")
|
|
return table
|
|
|
|
if __name__ == "__main__":
|
|
|
|
logger = getLogger().setLevel(40)
|
|
|
|
try:
|
|
parser = ap.ArgumentParser(
|
|
prog="XML Parser",
|
|
description='''This program parses XML data into a pandas DataFrame.
|
|
The XML data can come from an input file or the clipboard.
|
|
If an output file is specified, the DataFrame will be written to this file in Excel format.
|
|
If debug mode is enabled, detailed logging information will be written to "xml_parse.log".'''
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-i", "--input",
|
|
help="Path to the XML file to parse. If not specified, the program will ask for XML data from the clipboard."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-o", "--output",
|
|
help="Path to the output Excel file. If not specified, the DataFrame will be written to 'Parsed XML.xlsx' in the current directory."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--debug", action="store_true",
|
|
help="Enable debug mode. Detailed logging information will be written to 'xml_parse.log'."
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.debug:
|
|
logger = getLogger()
|
|
logger.setLevel(10)
|
|
f_handler = FileHandler(
|
|
Path("xml_parse.log")
|
|
)
|
|
f_handler.setLevel(10)
|
|
s_handler = StreamHandler()
|
|
s_handler.setLevel(40)
|
|
logger.addHandler(f_handler)
|
|
logger.addHandler(s_handler)
|
|
|
|
|
|
if args.input is not None:
|
|
i_file : Path = Path(args.input)
|
|
|
|
if not i_file.exists():
|
|
raise ValueError(f"{i_file} could not be found. Make sure the path is correct.")
|
|
elif i_file.suffix != ".xml":
|
|
raise NotImplementedError(f"This program can only parse .xml not {i_file}!")
|
|
with open(i_file) as xml_file:
|
|
xml_str: str = xml_file.read()
|
|
output_path = Path(i_file.parent, i_file.name)
|
|
|
|
else:
|
|
xml_str = process_clipboard()
|
|
output_path = Path("Parsed XML.xlsx")
|
|
|
|
xml_df: DataFrame = main(xml_str)
|
|
if args.output is not None:
|
|
try:
|
|
output_path = Path(args.output)
|
|
if output_path.suffix != ".xlsx":
|
|
output_path.suffix = ".xlsx"
|
|
except Exception as e:
|
|
exc(f"Failed to use passed output file: {args.output}.\
|
|
Using {output_path}.\n{e}")
|
|
output_path = alter_suffix(output_path, ".xlsx")
|
|
xml_df.to_excel(output_path, freeze_panes=(0,1), index=False)
|
|
print(f"Processing Complete!\nOutput data available here: {output_path}")
|
|
input("\n\nPress any key to exit.")
|
|
|
|
except Exception as e:
|
|
print(f"The program failed to start do the the following exception:\n{e}")
|
|
input(f"Please make note of the error before closing so that you can report it.")
|
|
|