Added CLI and redid the pyinstaller spec

master
= 3 years ago
parent c285863dd5
commit 5b3c56e51f
Signed by untrusted user who does not match committer: gprog
GPG Key ID: 5BE9BB58D37713F8
  1. 310
      parse_xml.py
  2. BIN
      requirements.txt

@ -1,119 +1,125 @@
try:
import re
from pandas import DataFrame
from typing import Union
from logging import debug as dbg, getLogger
import win32clipboard
def create_table(xmlStr: str) -> Union[DataFrame, Exception]:
dataDict = {
"SEQ": [],
}
MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S"
senarios = re.finditer(MATCH_SCENARIO,xmlStr)
dbg(senarios)
senario: re.Match
for senario in senarios:
senarioGroup = senario.group()
seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group()
seq = seqMatch[1:-1]
dbg(f"\nSeq: {seq}")
CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>"
UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up"
c = list(re.finditer(CONDITION_REGEX,senarioGroup))
dbg(f"\n\nSenario Group: {senarioGroup}")
updates = list(re.finditer(UPDATE_REGEX, senarioGroup))
dbg(f"{seq} | Updates: {updates}")
dbg(f"conditions:\n{[cond for cond in c]}")
senarioDict = {}
senarioDict["SEQ"] = int(seq)
for m in c:
group = m.group()
idStart, idEnd = re.search("\"[^\"]*\"", group).span()
id = group[idStart+1:idEnd-1]
dbg(f"SEQ: {seq} | {id}")
valueGroup = re.search("e\">(.)*<", group)
if valueGroup == None:
valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group)
value = valueGroup.group()[4:-3].strip()
else:
value = valueGroup.group()[3:-1]
dbg(f"SEQ: {seq} | {valueGroup}")
dbg(f"SEQ: {seq} | {value}")
senarioDict[id] = value
update: re.Match
for update in updates:
update = update.group()
dbg(f"{seq} | Update: {update}")
idMatch = re.search(r"\"\w+\"",update).span()
dbg(f"ID: {idMatch}")
id = update[idMatch[0]+1:idMatch[1]-1]
valueMatch = re.search(">(\w+|\.)+</Value>", update).span()
dbg(f"value: {valueMatch}")
value = update[valueMatch[0]+1:valueMatch[1]-8]
dbg(f"{seq} UPDATE | {id} : {value}")
senarioDict[id] = value
# Now merge the values from that senario into the main dict
seen = []
for key in dataDict.keys():
dbg(dataDict[key])
try:
senarioValue = senarioDict[key]
except KeyError:
senarioValue = ''
dataDict[key].append(senarioValue)
seen.append(key)
for key in [k for k in senarioDict.keys() if k not in seen]:
dataFill = ['' for _ in range(1,int(seq))]
dataFill.append(senarioDict[key])
dataDict[key] = dataFill
dbg(f"New key: {key} | {dataDict[key]}")
dbg(f"{seq} | {dataDict}\n")
dbg(dataDict)
if getLogger().level == 10:
for key in dataDict.keys():
dbg(f"{key} : {len(dataDict[key])}")
try:
table = DataFrame(dataDict)
table.set_index('SEQ', inplace=True)
dbg(table)
if table.empty:
raise Exception("No data found...", color='RED', effect='BOLD')
return table
except Exception as e:
return e
def process_clipboard() -> str:
correct = False
while not correct:
win32clipboard.OpenClipboard()
import re
from pandas import DataFrame
from typing import Union
from logging import debug as dbg, getLogger, exception as exc, FileHandler, StreamHandler
import win32clipboard
import argparse as ap
from pathlib import Path
def create_table(xmlStr: str) -> Union[DataFrame, Exception]:
dataDict = {
"SEQ": [],
}
MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S"
senarios = re.finditer(MATCH_SCENARIO,xmlStr)
dbg(senarios)
senario: re.Match
for senario in senarios:
senarioGroup = senario.group()
seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group()
seq = seqMatch[1:-1]
dbg(f"\nSeq: {seq}")
CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>"
UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up"
c = list(re.finditer(CONDITION_REGEX,senarioGroup))
dbg(f"\n\nSenario Group: {senarioGroup}")
updates = list(re.finditer(UPDATE_REGEX, senarioGroup))
dbg(f"{seq} | Updates: {updates}")
dbg(f"conditions:\n{[cond for cond in c]}")
senarioDict = {}
senarioDict["SEQ"] = int(seq)
for m in c:
group = m.group()
idStart, idEnd = re.search("\"[^\"]*\"", group).span()
id = group[idStart+1:idEnd-1]
dbg(f"SEQ: {seq} | {id}")
valueGroup = re.search("e\">(.)*<", group)
if valueGroup == None:
valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group)
value = valueGroup.group()[4:-3].strip()
else:
value = valueGroup.group()[3:-1]
dbg(f"SEQ: {seq} | {valueGroup}")
dbg(f"SEQ: {seq} | {value}")
senarioDict[id] = value
update: re.Match
for update in updates:
update = update.group()
dbg(f"{seq} | Update: {update}")
idMatch = re.search(r"\"\w+\"",update).span()
dbg(f"ID: {idMatch}")
id = update[idMatch[0]+1:idMatch[1]-1]
valueMatch = re.search(">(\w+|\.)+</Value>", update).span()
dbg(f"value: {valueMatch}")
value = update[valueMatch[0]+1:valueMatch[1]-8]
dbg(f"{seq} UPDATE | {id} : {value}")
senarioDict[id] = value
# Now merge the values from that senario into the main dict
seen = []
for key in dataDict.keys():
dbg(dataDict[key])
try:
xml = win32clipboard.GetClipboardData()
except:
xml = "None"
win32clipboard.CloseClipboard()
print(f"\n\nYour current clipboard is as follows:")
print(xml)
yn = input("\nIs this the XML you'd like to parse? (y/n)\n >")
if yn.lower() == "debug":
getLogger().setLevel(10)
print("\nYou have now entered debug mode...")
correct = True if re.search("(?i)y|1", yn) != None else False
if not correct:
input("Please copy the xml then press enter...")
return xml
senarioValue = senarioDict[key]
except KeyError:
senarioValue = ''
dataDict[key].append(senarioValue)
seen.append(key)
for key in [k for k in senarioDict.keys() if k not in seen]:
dataFill = ['' for _ in range(1,int(seq))]
dataFill.append(senarioDict[key])
dataDict[key] = dataFill
dbg(f"New key: {key} | {dataDict[key]}")
dbg(f"{seq} | {dataDict}\n")
dbg(dataDict)
if getLogger().level == 10:
for key in dataDict.keys():
dbg(f"{key} : {len(dataDict[key])}")
try:
table = DataFrame(dataDict)
table.set_index('SEQ', inplace=True)
dbg(table)
if table.empty:
raise Exception("No data found...", color='RED', effect='BOLD')
return table
except Exception as e:
return e
def process_clipboard() -> str:
correct = False
while not correct:
win32clipboard.OpenClipboard()
try:
xml = win32clipboard.GetClipboardData()
except:
xml = "None"
win32clipboard.CloseClipboard()
print(f"\n\nYour current clipboard is as follows:")
print(xml)
yn = input("\nIs this the XML you'd like to parse? (y/n)\n >")
if yn.lower() == "debug":
getLogger().setLevel(10)
print("\nYou have now entered debug mode...")
correct = True if re.search("(?i)y|1", yn) != None else False
if not correct:
input("Please copy the xml then press enter...")
return xml
def alter_suffix(p: Path, desired: str) -> Path:
if p.suffix != desired:
p = Path(p.name.replace(p.suffix, desired))
return p
def main(xml: str) -> DataFrame:
table = None
while type(table) != DataFrame:
xml = process_clipboard()
table: Union[DataFrame, Exception] = create_table(xml)
if type(table) != DataFrame:
print(f"\n\nENCOUNTERED ERROR!:\n{table}\n")
@ -122,7 +128,81 @@ try:
print(f"Table sample:")
print(table)
table.to_clipboard()
input("This table is now in your clipboard to paste into excel.")
except Exception as e:
print(f"The program failed to start do the the following exception:\n{e}")
input(f"Please make note of the error before closing so that you can report it.")
print("This table is now in your clipboard to paste into excel.")
return table
if __name__ == "__main__":
logger = getLogger().setLevel(40)
try:
parser = ap.ArgumentParser(
prog="XML Parser",
description='''This program parses XML data into a pandas DataFrame.
The XML data can come from an input file or the clipboard.
If an output file is specified, the DataFrame will be written to this file in Excel format.
If debug mode is enabled, detailed logging information will be written to "xml_parse.log".'''
)
parser.add_argument(
"-i", "--input",
help="Path to the XML file to parse. If not specified, the program will ask for XML data from the clipboard."
)
parser.add_argument(
"-o", "--output",
help="Path to the output Excel file. If not specified, the DataFrame will be written to 'Parsed XML.xlsx' in the current directory."
)
parser.add_argument(
"--debug", action="store_true",
help="Enable debug mode. Detailed logging information will be written to 'xml_parse.log'."
)
args = parser.parse_args()
if args.debug:
logger = getLogger()
logger.setLevel(10)
f_handler = FileHandler(
Path("xml_parse.log")
)
f_handler.setLevel(10)
s_handler = StreamHandler()
s_handler.setLevel(40)
logger.addHandler(f_handler)
logger.addHandler(s_handler)
if args.input is not None:
i_file : Path = Path(args.input)
if not i_file.exists():
raise ValueError(f"{i_file} could not be found. Make sure the path is correct.")
elif i_file.suffix != ".xml":
raise NotImplementedError(f"This program can only parse .xml not {i_file}!")
with open(i_file) as xml_file:
xml_str: str = xml_file.read()
output_path = Path(i_file.parent, i_file.name)
else:
xml_str = process_clipboard()
output_path = Path("Parsed XML.xlsx")
xml_df: DataFrame = main(xml_str)
if args.output is not None:
try:
output_path = Path(args.output)
if output_path.suffix != ".xlsx":
output_path.suffix = ".xlsx"
except Exception as e:
exc(f"Failed to use passed output file: {args.output}.\
Using {output_path}.\n{e}")
output_path = alter_suffix(output_path, ".xlsx")
xml_df.to_excel(output_path, freeze_panes=(0,1), index=False)
print(f"Processing Complete!\nOutput data available here: {output_path}")
input("\n\nPress any key to exit.")
except Exception as e:
print(f"The program failed to start do the the following exception:\n{e}")
input(f"Please make note of the error before closing so that you can report it.")

Binary file not shown.
Loading…
Cancel
Save