AdmitParser --- Specialized parser for ADMIT XML files.
This module defines the AdmitParser class.
from xml import sax
import copy
import numpy as np
from admit.util.AdmitLogging import AdmitLogging as logging
from admit.xmlio.DTDParser import DTDParser
import admit.util.bdp_types as bt
import admit.util.utils as utils
import admit.Summary as Summary
from admit.AT import AT
import admit.FlowManager as fm
import admit.util.admit_ast as aast
[docs]class AdmitParser(sax.handler.ContentHandler):
""" Specialized XML parser for admit and bdp parsing.
basedir : str
The base directory being used to read in the ADMIT files.
xmlFile : str
The file to parse.
Deafult: "admit.xml".
xmlFile : str
The file to parse.
basedir : str
The base directory being used to read in the ADMIT files.
dtd : DtdReader
Instance of the DtdReader to use.
AT : List
List for keeping track of the AT's found.
curAT : AT
Instance of the current AT being reconstructed.
type : str
Type of the curAT.
name : str
Name of the current node being parsed.
ndarr : List
List of elements that are numpy arrays.
sets : List
List of elements that are sets.
userData : Dict
Dictionary for the user data.
summaryData : Dict
Dictionary for the summaryData.
summaryEntry : SummaryEntry
Current instance of the SummaryEntry being reconstructed.
flowmanager : FlowManager
Local copy of the FlowManager for reconstruction.
projmanager : Dict
Project ID to base directory map.
Util : various classes
The current utility class being reconstructed.
MultiImage : MultiImage class instance
The current MultiImage being reconstructed.
inflow : Boolean
Whether the parser is currently reconstructing the flow manager.
inAdmit : Boolean
Whether the parser is currently reconstructing the ADMIT base
inUtil : Boolean
Whether the parser is currently reconstructing a utility class.
inMulti : Boolean
Whether the parser is currently reconstructung a MultiImage.
inKeys : Boolean
Whether the parser is currently reconstructing the keys of an AT.
inSummary : Boolean
Whether the parser is currently reconstructing the summary data.
inSummaryEntry : Boolean
Whether the parser is currently reconstructing a SummaryEntry.
utilType : str
The type of the current utility class being reconstructed.
utilName : str
The name of the current utility class being reconstructed.
multiName : str
The name of the MultiImage currently being reconstructed.
summaryName : str
The name of the summary being reconstructed.
summaryEntryName : str
The name of the SummaryEntry being reconstructed.
admit : ADMIT
The base class being reconstructed.
metadataName : str
The name of the current metadata node.
inBDP : Boolean
Whether the parser is currently reconstructing a BDP.
inAT : Boolean
Whether the parser is currently reconstructing an AT.
tempdata : str
String for holding large data that need to be reconstructed.
def __init__(self, basedir="", xmlFile="admit.xml"):
self.xmlFile = xmlFile
self.basedir = basedir
# initialize the parent class
# get the dtd info
self.dtd = DTDParser(xmlFile)
self.AT = []
# intialize all of the variables that track everything as it is
# being reconstructed
self.curAT = None
self.BDP = None
self.type = None
self.name = None
self.ndarr = []
self.sets = []
self.userData = None
self.summaryData = None
self.summaryEntry = None
self.flowmanager = None
self.projmanager = None
self.inflow = False
self.inAdmit = False
self.inKeys = False
self.inSummary = False
self.inSummaryEntry = False
self.summaryName = None
self.summaryEntryName = None
self.metadataName = None
self.admit = None
self.Util = None
self.MultiImage = None
self.inUtil = False
self.inMulti = False
self.utilType = ""
self.utilName = ""
self.multiName = ""
self.inBDP = False
self.inAT = False
self.tempdata = ""
self.flowdata = ""
[docs] def getBDP(self):
""" Return the current BDP
The current BDP
# return the generated BDP
return copy.deepcopy(self.BDP)
[docs] def setadmit(self, admit):
""" Set the base class to the given class
admit : ADMIT
The class to set the base to
self.admit = admit
[docs] def getflowmanager(self):
""" Returns the FlowManager instance
The current FlowManager instance
return self.flowmanager
[docs] def getAT(self):
""" Return the list of reconstructed AT's
List of AT's
# return the generated AT list
return self.AT
[docs] def startElement(self, name, attrib):
""" Method called whenever a new element is found. Dtd validation is
done for each node and attribute. This method is only called by the
SAX parser iteself.
name : str
The name of the current node
attrib : Dict
Dictionary of any attributes and their values that was found by
the parser
# get the type of the data
self.tempdata = ""
temp = str(attrib.get("type"))
# figure out where the node belongs
if bt.ADMIT == name or temp == bt.AT:
if self.inAT:
self.dtd.check(name + "_" + self.curAT.show().upper(), "type",
self.dtd.check(name, "type", temp)
if name == bt.FLOWMANAGER:
self.inflow = True
self.name = name
if self.inBDP:
self.dtd.check(name, "type", temp)
if name == "_keys":
self.inKeys = True
# find out what type of data the node contains
if temp == bt.STRING:
self.type = str()
elif temp == bt.BOOL:
self.type = bool()
elif temp == bt.FLOAT:
self.type = float()
elif temp == bt.LONG:
self.type = long()
elif temp == bt.NONE:
target = None
# handle the utility, BDP and AT classes
if self.inUtil:
target = self.Util
elif self.inBDP:
target = self.BDP
elif self.inAT:
target = self.curAT
target = self.admit
setattr(target, self.name, None)
except AttributeError:
print "Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % \
(self.name, str(type(target)))
elif temp == bt.DICT:
self.type = dict()
self.ndarr = aast.literal_eval(attrib.get("ndarray"))
self.sets = aast.literal_eval(attrib.get("set"))
tname = name
if self.inAT:
tname += "_" + self.curAT.show().upper()
self.dtd.check(tname, "ndarray", bt.STRING)
self.dtd.check(tname, "set", bt.STRING)
elif temp == bt.LIST:
self.type = list()
self.ndarr = aast.literal_eval(attrib.get("ndarray"))
self.sets = aast.literal_eval(attrib.get("set"))
tname = name
if self.inAT:
tname += "_" + self.curAT.show().upper()
self.dtd.check(tname, "ndarray", bt.STRING)
self.dtd.check(tname, "set", bt.STRING)
elif temp == bt.SET:
self.type = set()
self.ndarr = aast.literal_eval(attrib.get("ndarray"))
self.sets = aast.literal_eval(attrib.get("set"))
tname = name
if self.inAT:
tname += "_" + self.curAT.show().upper()
self.dtd.check(tname, "ndarray", bt.STRING)
self.dtd.check(tname, "set", bt.STRING)
elif temp == bt.INT:
self.type = int()
elif temp == bt.TUPLE:
self.type = tuple()
self.ndarr = aast.literal_eval(attrib.get("ndarray"))
self.sets = aast.literal_eval(attrib.get("set"))
tname = name
if self.inAT:
tname += "_" + self.curAT.show().upper()
self.dtd.check(tname, "ndarray", bt.STRING)
self.dtd.check(tname, "set", bt.STRING)
elif temp == bt.NDARRAY:
self.type = np.ndarray([])
elif name == bt.ADMIT:
self.inAdmit = True
elif temp.title() in bt.UTIL_LIST or temp.upper() == bt.MULTIIMAGE:
self.inUtil = True
if temp.upper() == bt.MULTIIMAGE:
self.multiName = name
self.MultiImage = utils.getClass("util", "MultiImage")
self.inMulti = True
self.Util = utils.getClass("util", temp.title())
self.utilName = name
self.utilType = temp
elif temp == bt.AT:
if self.flowmanager is None:
raise Exception("FlowManager not initialized, xml in wrong order")
self.inAT = True
self.curAT = utils.getClass("at", name)
except Exception, e:
raise Exception("Could not create class of type %s because %s" % (name, str(e)))
elif name == bt.BDP:
self.type = temp
self.inBDP = True
self.BDP = utils.getClass("bdp", temp)
except Exception, e:
raise Exception("Could not create class of type %s because %s" % (temp, str(e)))
elif temp == bt.SUMMARY:
self.summaryData = Summary.Summary()
self.inSummary = True
self.summaryName = name
elif temp == bt.SUMMARYENTRY:
self.summaryEntry = Summary.SummaryEntry()
self.inSummaryEntry = True
self.summaryEntryName = name
elif name == "metadata" and self.inSummary:
self.metadataName = str(attrib.get("name"))
self.dtd.check(name, "name", bt.STRING)
self.summaryData._metadata[self.metadataName] = []
[docs] def endElement(self, name):
""" Method called whenever the end of an xml element is reached. This
method is only called by the SAX parser iteself.
name : str
The name of the node that just ended
# reset the tracking stuff, add BDP's to AT's, AT's to the flowmanager
# reconstruct any nodes that spanned multiple lines
if name == self.utilName:
# add the utility classes to the appropriate parent class
# Images always get added to MultiImages
if self.inMulti:
self.MultiImage.addimage(copy.deepcopy(self.Util), self.Util.name)
elif self.inBDP:
setattr(self.BDP, self.utilName, copy.deepcopy(self.Util))
elif self.inAT:
setattr(self.curAT, self.utilName, copy.deepcopy(self.Util))
self.inUtil = False
self.utilName = ""
elif name == self.multiName:
if self.inBDP:
setattr(self.BDP, self.multiName, copy.deepcopy(self.MultiImage))
elif self.inAT:
setattr(self.curAT, self.multiName, copy.deepcopy(self.MultiImage))
self.multiImageName = ""
self.inMulti = False
self.inUtil = False
elif name == bt.BDP:
# one last validation run
self.BDP._baseDir = self.basedir
if not self.dtd.checkAll():
logging.info("Some required nodes missing from xml file, attempting to continue anyway.")
elif name == bt.FLOWMANAGER:
temp = aast.literal_eval(self.flowdata)
for key in ["depsmap", "varimap"]:
if key in temp:
temp[key] = eval(temp[key])
self.flowmanager = fm.FlowManager(**temp)
self.inflow = False
elif isinstance(self.type, str):
if self.inUtil:
target = self.Util
elif self.inBDP:
target = self.BDP
elif self.inAT:
target = self.curAT
elif self.inSummaryEntry:
target = self.summaryEntry
elif name == "projmanager":
target = self
target = self.admit
self.setattr(target, name, self.tempdata)
self.tempdata = ""
elif isinstance(self.type, list) or isinstance(self.type, dict) \
or isinstance(self.type, tuple) or isinstance(self.type, set):
temp = aast.literal_eval(self.tempdata)
if self.inUtil:
target = self.Util
elif self.inBDP:
target = self.BDP
elif self.inAT:
target = self.curAT
elif self.inSummaryEntry:
target = self.summaryEntry
elif name == "projmanager":
target = self
target = self.admit
for i in self.ndarr:
temp[i] = np.array(temp[i], dtype=object)
for i in self.sets:
temp[i] = set(temp[i])
if isinstance(self.type, tuple):
temp = tuple(temp)
elif isinstance(self.type, set):
temp = set(temp)
self.setattr(target, name, temp)
except AttributeError:
logging.info("Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % (self.name, str(type(target))))
elif isinstance(self.type, np.ndarray):
temp = aast.literal_eval(self.tempdata)
if self.inUtil:
target = self.Util
elif self.inBDP:
target = self.BDP
elif self.inAT:
target = self.curAT
target = self.admit
self.setattr(target, self.name, np.array(temp, dtype=object))
except AttributeError:
logging.info("Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % (self.name, str(type(target))))
elif self.inAT and name == self.curAT.show():
self.inAdmit = False
# one last validation run
self.curAT._bdp_in = [None] * len(self.curAT._bdp_in_map)
self.curAT._bdp_out = [None] * len(self.curAT._bdp_out_map)
self.curAT._baseDir = self.basedir
at = copy.deepcopy(self.curAT)
self.flowmanager[at._taskid] = at
self.curAT = None
self.inAT = False
elif name == bt.ADMIT:
self.inAdmit = False
if not self.dtd.checkAll():
print "Some required nodes missing from admit.xml file, attempting to continue anyway"
elif name == "_keys":
self.inKeys = False
elif name == self.summaryEntryName:
self.summaryEntryName = None
self.inSummaryEntry = False
elif name == self.summaryName:
self.inSummary = False
elif name == self.metadataName:
self.metadataName = None
self.ndarr = []
self.sets = []
self.type = None
self.name = None
self.ndarr = False
[docs] def setattr(self, target, name, value):
""" Method to set attributes in a class
target : object
The class to which the paremeters is being set
name : str
The name of the attribute to set
value : various
The value to set the attribute to
# if we are in the keys the treat the special
if self.inKeys:
target.setkey(name, value, True)
setattr(target, name, value)
[docs] def getattr(self, target, name):
""" Method to get the value of a specific data member from the given class
target : object
The class from which the data value will be obtained
name : str
The name of the data member whose value will be obtained
Various, the value of the requested data member, None if it does not exist
if self.inKeys:
if hasattr(target, name):
return target.get(name)
return None
[docs] def characters(self, ch):
""" Method called whenever characters are detected in an xml node
This method does some dtd validation. This
method is only called by the SAX parser iteself.
ch : unicode characters
target = None
char = str(ch).strip()
if char.isspace() or not char:
# determine which class the data are getting writtrn to
if self.inUtil:
target = self.Util
elif self.inBDP:
target = self.BDP
elif self.inAT:
target = self.curAT
elif self.inSummaryEntry:
target = self.summaryEntry
elif self.inSummary:
target = self.summaryData
target = self.admit
# a list or dictionary has to be decoded
if isinstance(self.type, list) or isinstance(self.type, dict) \
or isinstance(self.type, tuple) or isinstance(self.type, set) \
or isinstance(self.type, np.ndarray) or isinstance(self.type, str):
if self.inflow:
self.flowdata += char
self.tempdata += char
# check the version
if self.name == "_version":
ver = self.getattr(target, self.name)
vercheck = utils.compareversions(ver, str(char))
if vercheck < 0: # newer read in
logging.warning("Version mismatch for %s, data are a newer version than current software, attempting to continue." % target.getkey("_type"))
elif vercheck > 0: # older read in
logging.warning("Version mismatch for %s, data are an older version than current software, attempting to continue." % target.getkey("_type"))
self.setattr(target, self.name, self.getData(char))
except AttributeError:
logging.info("Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % (self.name, str(type(target))))
del ch
[docs] def close(self):
""" Method to close out the reader, called only by the sax parser
[docs] def getData(self, item):
""" Method used to convert string to designated type.
input: str
The string to convert
Various types, based on the expected type of the xml node
if isinstance(self.type, bool):
return bool(int(item))
if isinstance(self.type, int):
return int(item)
if isinstance(self.type, long):
return long(item)
if isinstance(self.type, float):
return float(item)
if isinstance(self.type, str):
return str(item)