|
|
|
|
@ -1,119 +1,125 @@ |
|
|
|
|
try: |
|
|
|
|
import re |
|
|
|
|
from pandas import DataFrame |
|
|
|
|
from typing import Union |
|
|
|
|
from logging import debug as dbg, getLogger |
|
|
|
|
import win32clipboard |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_table(xmlStr: str) -> Union[DataFrame, Exception]: |
|
|
|
|
dataDict = { |
|
|
|
|
"SEQ": [], |
|
|
|
|
} |
|
|
|
|
MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S" |
|
|
|
|
senarios = re.finditer(MATCH_SCENARIO,xmlStr) |
|
|
|
|
dbg(senarios) |
|
|
|
|
senario: re.Match |
|
|
|
|
for senario in senarios: |
|
|
|
|
senarioGroup = senario.group() |
|
|
|
|
seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group() |
|
|
|
|
seq = seqMatch[1:-1] |
|
|
|
|
dbg(f"\nSeq: {seq}") |
|
|
|
|
|
|
|
|
|
CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>" |
|
|
|
|
UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
c = list(re.finditer(CONDITION_REGEX,senarioGroup)) |
|
|
|
|
dbg(f"\n\nSenario Group: {senarioGroup}") |
|
|
|
|
updates = list(re.finditer(UPDATE_REGEX, senarioGroup)) |
|
|
|
|
dbg(f"{seq} | Updates: {updates}") |
|
|
|
|
dbg(f"conditions:\n{[cond for cond in c]}") |
|
|
|
|
senarioDict = {} |
|
|
|
|
senarioDict["SEQ"] = int(seq) |
|
|
|
|
for m in c: |
|
|
|
|
group = m.group() |
|
|
|
|
idStart, idEnd = re.search("\"[^\"]*\"", group).span() |
|
|
|
|
id = group[idStart+1:idEnd-1] |
|
|
|
|
dbg(f"SEQ: {seq} | {id}") |
|
|
|
|
valueGroup = re.search("e\">(.)*<", group) |
|
|
|
|
if valueGroup == None: |
|
|
|
|
valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group) |
|
|
|
|
value = valueGroup.group()[4:-3].strip() |
|
|
|
|
else: |
|
|
|
|
value = valueGroup.group()[3:-1] |
|
|
|
|
dbg(f"SEQ: {seq} | {valueGroup}") |
|
|
|
|
dbg(f"SEQ: {seq} | {value}") |
|
|
|
|
senarioDict[id] = value |
|
|
|
|
update: re.Match |
|
|
|
|
for update in updates: |
|
|
|
|
update = update.group() |
|
|
|
|
dbg(f"{seq} | Update: {update}") |
|
|
|
|
idMatch = re.search(r"\"\w+\"",update).span() |
|
|
|
|
dbg(f"ID: {idMatch}") |
|
|
|
|
id = update[idMatch[0]+1:idMatch[1]-1] |
|
|
|
|
valueMatch = re.search(">(\w+|\.)+</Value>", update).span() |
|
|
|
|
dbg(f"value: {valueMatch}") |
|
|
|
|
value = update[valueMatch[0]+1:valueMatch[1]-8] |
|
|
|
|
dbg(f"{seq} UPDATE | {id} : {value}") |
|
|
|
|
senarioDict[id] = value |
|
|
|
|
# Now merge the values from that senario into the main dict |
|
|
|
|
seen = [] |
|
|
|
|
for key in dataDict.keys(): |
|
|
|
|
dbg(dataDict[key]) |
|
|
|
|
try: |
|
|
|
|
senarioValue = senarioDict[key] |
|
|
|
|
except KeyError: |
|
|
|
|
senarioValue = '' |
|
|
|
|
dataDict[key].append(senarioValue) |
|
|
|
|
seen.append(key) |
|
|
|
|
for key in [k for k in senarioDict.keys() if k not in seen]: |
|
|
|
|
dataFill = ['' for _ in range(1,int(seq))] |
|
|
|
|
dataFill.append(senarioDict[key]) |
|
|
|
|
dataDict[key] = dataFill |
|
|
|
|
dbg(f"New key: {key} | {dataDict[key]}") |
|
|
|
|
|
|
|
|
|
dbg(f"{seq} | {dataDict}\n") |
|
|
|
|
|
|
|
|
|
dbg(dataDict) |
|
|
|
|
if getLogger().level == 10: |
|
|
|
|
for key in dataDict.keys(): |
|
|
|
|
dbg(f"{key} : {len(dataDict[key])}") |
|
|
|
|
try: |
|
|
|
|
table = DataFrame(dataDict) |
|
|
|
|
table.set_index('SEQ', inplace=True) |
|
|
|
|
dbg(table) |
|
|
|
|
if table.empty: |
|
|
|
|
raise Exception("No data found...", color='RED', effect='BOLD') |
|
|
|
|
return table |
|
|
|
|
except Exception as e: |
|
|
|
|
return e |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_clipboard() -> str: |
|
|
|
|
correct = False |
|
|
|
|
while not correct: |
|
|
|
|
win32clipboard.OpenClipboard() |
|
|
|
|
import re |
|
|
|
|
from pandas import DataFrame |
|
|
|
|
from typing import Union |
|
|
|
|
from logging import debug as dbg, getLogger, exception as exc, FileHandler, StreamHandler |
|
|
|
|
import win32clipboard |
|
|
|
|
import argparse as ap |
|
|
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_table(xmlStr: str) -> Union[DataFrame, Exception]: |
|
|
|
|
dataDict = { |
|
|
|
|
"SEQ": [], |
|
|
|
|
} |
|
|
|
|
MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S" |
|
|
|
|
senarios = re.finditer(MATCH_SCENARIO,xmlStr) |
|
|
|
|
dbg(senarios) |
|
|
|
|
senario: re.Match |
|
|
|
|
for senario in senarios: |
|
|
|
|
senarioGroup = senario.group() |
|
|
|
|
seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group() |
|
|
|
|
seq = seqMatch[1:-1] |
|
|
|
|
dbg(f"\nSeq: {seq}") |
|
|
|
|
|
|
|
|
|
CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>" |
|
|
|
|
UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up" |
|
|
|
|
|
|
|
|
|
c = list(re.finditer(CONDITION_REGEX,senarioGroup)) |
|
|
|
|
dbg(f"\n\nSenario Group: {senarioGroup}") |
|
|
|
|
updates = list(re.finditer(UPDATE_REGEX, senarioGroup)) |
|
|
|
|
dbg(f"{seq} | Updates: {updates}") |
|
|
|
|
dbg(f"conditions:\n{[cond for cond in c]}") |
|
|
|
|
senarioDict = {} |
|
|
|
|
senarioDict["SEQ"] = int(seq) |
|
|
|
|
for m in c: |
|
|
|
|
group = m.group() |
|
|
|
|
idStart, idEnd = re.search("\"[^\"]*\"", group).span() |
|
|
|
|
id = group[idStart+1:idEnd-1] |
|
|
|
|
dbg(f"SEQ: {seq} | {id}") |
|
|
|
|
valueGroup = re.search("e\">(.)*<", group) |
|
|
|
|
if valueGroup == None: |
|
|
|
|
valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group) |
|
|
|
|
value = valueGroup.group()[4:-3].strip() |
|
|
|
|
else: |
|
|
|
|
value = valueGroup.group()[3:-1] |
|
|
|
|
dbg(f"SEQ: {seq} | {valueGroup}") |
|
|
|
|
dbg(f"SEQ: {seq} | {value}") |
|
|
|
|
senarioDict[id] = value |
|
|
|
|
update: re.Match |
|
|
|
|
for update in updates: |
|
|
|
|
update = update.group() |
|
|
|
|
dbg(f"{seq} | Update: {update}") |
|
|
|
|
idMatch = re.search(r"\"\w+\"",update).span() |
|
|
|
|
dbg(f"ID: {idMatch}") |
|
|
|
|
id = update[idMatch[0]+1:idMatch[1]-1] |
|
|
|
|
valueMatch = re.search(">(\w+|\.)+</Value>", update).span() |
|
|
|
|
dbg(f"value: {valueMatch}") |
|
|
|
|
value = update[valueMatch[0]+1:valueMatch[1]-8] |
|
|
|
|
dbg(f"{seq} UPDATE | {id} : {value}") |
|
|
|
|
senarioDict[id] = value |
|
|
|
|
# Now merge the values from that senario into the main dict |
|
|
|
|
seen = [] |
|
|
|
|
for key in dataDict.keys(): |
|
|
|
|
dbg(dataDict[key]) |
|
|
|
|
try: |
|
|
|
|
xml = win32clipboard.GetClipboardData() |
|
|
|
|
except: |
|
|
|
|
xml = "None" |
|
|
|
|
win32clipboard.CloseClipboard() |
|
|
|
|
print(f"\n\nYour current clipboard is as follows:") |
|
|
|
|
print(xml) |
|
|
|
|
yn = input("\nIs this the XML you'd like to parse? (y/n)\n >") |
|
|
|
|
if yn.lower() == "debug": |
|
|
|
|
getLogger().setLevel(10) |
|
|
|
|
print("\nYou have now entered debug mode...") |
|
|
|
|
correct = True if re.search("(?i)y|1", yn) != None else False |
|
|
|
|
if not correct: |
|
|
|
|
input("Please copy the xml then press enter...") |
|
|
|
|
return xml |
|
|
|
|
senarioValue = senarioDict[key] |
|
|
|
|
except KeyError: |
|
|
|
|
senarioValue = '' |
|
|
|
|
dataDict[key].append(senarioValue) |
|
|
|
|
seen.append(key) |
|
|
|
|
for key in [k for k in senarioDict.keys() if k not in seen]: |
|
|
|
|
dataFill = ['' for _ in range(1,int(seq))] |
|
|
|
|
dataFill.append(senarioDict[key]) |
|
|
|
|
dataDict[key] = dataFill |
|
|
|
|
dbg(f"New key: {key} | {dataDict[key]}") |
|
|
|
|
|
|
|
|
|
dbg(f"{seq} | {dataDict}\n") |
|
|
|
|
|
|
|
|
|
dbg(dataDict) |
|
|
|
|
if getLogger().level == 10: |
|
|
|
|
for key in dataDict.keys(): |
|
|
|
|
dbg(f"{key} : {len(dataDict[key])}") |
|
|
|
|
try: |
|
|
|
|
table = DataFrame(dataDict) |
|
|
|
|
table.set_index('SEQ', inplace=True) |
|
|
|
|
dbg(table) |
|
|
|
|
if table.empty: |
|
|
|
|
raise Exception("No data found...", color='RED', effect='BOLD') |
|
|
|
|
return table |
|
|
|
|
except Exception as e: |
|
|
|
|
return e |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_clipboard() -> str: |
|
|
|
|
correct = False |
|
|
|
|
while not correct: |
|
|
|
|
win32clipboard.OpenClipboard() |
|
|
|
|
try: |
|
|
|
|
xml = win32clipboard.GetClipboardData() |
|
|
|
|
except: |
|
|
|
|
xml = "None" |
|
|
|
|
win32clipboard.CloseClipboard() |
|
|
|
|
print(f"\n\nYour current clipboard is as follows:") |
|
|
|
|
print(xml) |
|
|
|
|
yn = input("\nIs this the XML you'd like to parse? (y/n)\n >") |
|
|
|
|
if yn.lower() == "debug": |
|
|
|
|
getLogger().setLevel(10) |
|
|
|
|
print("\nYou have now entered debug mode...") |
|
|
|
|
correct = True if re.search("(?i)y|1", yn) != None else False |
|
|
|
|
if not correct: |
|
|
|
|
input("Please copy the xml then press enter...") |
|
|
|
|
return xml |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def alter_suffix(p: Path, desired: str) -> Path: |
|
|
|
|
if p.suffix != desired: |
|
|
|
|
p = Path(p.name.replace(p.suffix, desired)) |
|
|
|
|
return p |
|
|
|
|
|
|
|
|
|
def main(xml: str) -> DataFrame: |
|
|
|
|
table = None |
|
|
|
|
while type(table) != DataFrame: |
|
|
|
|
xml = process_clipboard() |
|
|
|
|
|
|
|
|
|
table: Union[DataFrame, Exception] = create_table(xml) |
|
|
|
|
if type(table) != DataFrame: |
|
|
|
|
print(f"\n\nENCOUNTERED ERROR!:\n{table}\n") |
|
|
|
|
@ -122,7 +128,81 @@ try: |
|
|
|
|
print(f"Table sample:") |
|
|
|
|
print(table) |
|
|
|
|
table.to_clipboard() |
|
|
|
|
input("This table is now in your clipboard to paste into excel.") |
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"The program failed to start do the the following exception:\n{e}") |
|
|
|
|
input(f"Please make note of the error before closing so that you can report it.") |
|
|
|
|
print("This table is now in your clipboard to paste into excel.") |
|
|
|
|
return table |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
|
logger = getLogger().setLevel(40) |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
parser = ap.ArgumentParser( |
|
|
|
|
prog="XML Parser", |
|
|
|
|
description='''This program parses XML data into a pandas DataFrame. |
|
|
|
|
The XML data can come from an input file or the clipboard. |
|
|
|
|
If an output file is specified, the DataFrame will be written to this file in Excel format. |
|
|
|
|
If debug mode is enabled, detailed logging information will be written to "xml_parse.log".''' |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
parser.add_argument( |
|
|
|
|
"-i", "--input", |
|
|
|
|
help="Path to the XML file to parse. If not specified, the program will ask for XML data from the clipboard." |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
parser.add_argument( |
|
|
|
|
"-o", "--output", |
|
|
|
|
help="Path to the output Excel file. If not specified, the DataFrame will be written to 'Parsed XML.xlsx' in the current directory." |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
parser.add_argument( |
|
|
|
|
"--debug", action="store_true", |
|
|
|
|
help="Enable debug mode. Detailed logging information will be written to 'xml_parse.log'." |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
|
if args.debug: |
|
|
|
|
logger = getLogger() |
|
|
|
|
logger.setLevel(10) |
|
|
|
|
f_handler = FileHandler( |
|
|
|
|
Path("xml_parse.log") |
|
|
|
|
) |
|
|
|
|
f_handler.setLevel(10) |
|
|
|
|
s_handler = StreamHandler() |
|
|
|
|
s_handler.setLevel(40) |
|
|
|
|
logger.addHandler(f_handler) |
|
|
|
|
logger.addHandler(s_handler) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if args.input is not None: |
|
|
|
|
i_file : Path = Path(args.input) |
|
|
|
|
|
|
|
|
|
if not i_file.exists(): |
|
|
|
|
raise ValueError(f"{i_file} could not be found. Make sure the path is correct.") |
|
|
|
|
elif i_file.suffix != ".xml": |
|
|
|
|
raise NotImplementedError(f"This program can only parse .xml not {i_file}!") |
|
|
|
|
with open(i_file) as xml_file: |
|
|
|
|
xml_str: str = xml_file.read() |
|
|
|
|
output_path = Path(i_file.parent, i_file.name) |
|
|
|
|
|
|
|
|
|
else: |
|
|
|
|
xml_str = process_clipboard() |
|
|
|
|
output_path = Path("Parsed XML.xlsx") |
|
|
|
|
|
|
|
|
|
xml_df: DataFrame = main(xml_str) |
|
|
|
|
if args.output is not None: |
|
|
|
|
try: |
|
|
|
|
output_path = Path(args.output) |
|
|
|
|
if output_path.suffix != ".xlsx": |
|
|
|
|
output_path.suffix = ".xlsx" |
|
|
|
|
except Exception as e: |
|
|
|
|
exc(f"Failed to use passed output file: {args.output}.\ |
|
|
|
|
Using {output_path}.\n{e}") |
|
|
|
|
output_path = alter_suffix(output_path, ".xlsx") |
|
|
|
|
xml_df.to_excel(output_path, freeze_panes=(0,1), index=False) |
|
|
|
|
print(f"Processing Complete!\nOutput data available here: {output_path}") |
|
|
|
|
input("\n\nPress any key to exit.") |
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
print(f"The program failed to start do the the following exception:\n{e}") |
|
|
|
|
input(f"Please make note of the error before closing so that you can report it.") |
|
|
|
|
|