You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
XMLRuleParser/parse_xml.py

128 lines
5.1 KiB

try:
import re
from pandas import DataFrame
from typing import Union
from logging import debug as dbg, getLogger
import win32clipboard
def create_table(xmlStr: str) -> Union[DataFrame, Exception]:
dataDict = {
"SEQ": [],
}
MATCH_SCENARIO = "<Scenario Seq=\"\d{1,3}\">((?!<Sc)(.|\n))*</S"
senarios = re.finditer(MATCH_SCENARIO,xmlStr)
dbg(senarios)
senario: re.Match
for senario in senarios:
senarioGroup = senario.group()
seqMatch = re.search("\"\d{1,3}\"",senarioGroup).group()
seq = seqMatch[1:-1]
dbg(f"\nSeq: {seq}")
CONDITION_REGEX = r"<Condition Id=\"\w+\" Group=\"\w+\" CompareTo=\"(Value|Range)\">((?!</C)(.|\n))*</Condition>"
UPDATE_REGEX = r"<UpdateField Id=\"\w+\" Group=\"\w+\" UIRequired=\"\d+\" UIDisabled=\"\d+\" ForceUpdate=\"\d+\">\n?((?!</U)(.|\n))*</Up"
c = list(re.finditer(CONDITION_REGEX,senarioGroup))
dbg(f"\n\nSenario Group: {senarioGroup}")
updates = list(re.finditer(UPDATE_REGEX, senarioGroup))
dbg(f"{seq} | Updates: {updates}")
dbg(f"conditions:\n{[cond for cond in c]}")
senarioDict = {}
senarioDict["SEQ"] = int(seq)
for m in c:
group = m.group()
idStart, idEnd = re.search("\"[^\"]*\"", group).span()
id = group[idStart+1:idEnd-1]
dbg(f"SEQ: {seq} | {id}")
valueGroup = re.search("e\">(.)*<", group)
if valueGroup == None:
valueGroup = re.search("e\"(((?!</C)(.|\n))*)</C", group)
value = valueGroup.group()[4:-3].strip()
else:
value = valueGroup.group()[3:-1]
dbg(f"SEQ: {seq} | {valueGroup}")
dbg(f"SEQ: {seq} | {value}")
senarioDict[id] = value
update: re.Match
for update in updates:
update = update.group()
dbg(f"{seq} | Update: {update}")
idMatch = re.search(r"\"\w+\"",update).span()
dbg(f"ID: {idMatch}")
id = update[idMatch[0]+1:idMatch[1]-1]
valueMatch = re.search(">(\w+|\.)+</Value>", update).span()
dbg(f"value: {valueMatch}")
value = update[valueMatch[0]+1:valueMatch[1]-8]
dbg(f"{seq} UPDATE | {id} : {value}")
senarioDict[id] = value
# Now merge the values from that senario into the main dict
seen = []
for key in dataDict.keys():
dbg(dataDict[key])
try:
senarioValue = senarioDict[key]
except KeyError:
senarioValue = ''
dataDict[key].append(senarioValue)
seen.append(key)
for key in [k for k in senarioDict.keys() if k not in seen]:
dataFill = ['' for _ in range(1,int(seq))]
dataFill.append(senarioDict[key])
dataDict[key] = dataFill
dbg(f"New key: {key} | {dataDict[key]}")
dbg(f"{seq} | {dataDict}\n")
dbg(dataDict)
if getLogger().level == 10:
for key in dataDict.keys():
dbg(f"{key} : {len(dataDict[key])}")
try:
table = DataFrame(dataDict)
table.set_index('SEQ', inplace=True)
dbg(table)
if table.empty:
raise Exception("No data found...", color='RED', effect='BOLD')
return table
except Exception as e:
return e
def process_clipboard() -> str:
correct = False
while not correct:
win32clipboard.OpenClipboard()
try:
xml = win32clipboard.GetClipboardData()
except:
xml = "None"
win32clipboard.CloseClipboard()
print(f"\n\nYour current clipboard is as follows:")
print(xml)
yn = input("\nIs this the XML you'd like to parse? (y/n)\n >")
if yn.lower() == "debug":
getLogger().setLevel(10)
print("\nYou have now entered debug mode...")
correct = True if re.search("(?i)y|1", yn) != None else False
if not correct:
input("Please copy the xml then press enter...")
return xml
table = None
while type(table) != DataFrame:
xml = process_clipboard()
table: Union[DataFrame, Exception] = create_table(xml)
if type(table) != DataFrame:
print(f"\n\nENCOUNTERED ERROR!:\n{table}\n")
input("Please try again...")
continue
print(f"Table sample:")
print(table)
table.to_clipboard()
input("This table is now in your clipboard to paste into excel.")
except Exception as e:
print(f"The program failed to start do the the following exception:\n{e}")
input(f"Please make note of the error before closing so that you can report it.")