Source code for admit.xmlio.AdmitParser

"""
    AdmitParser --- Specialized parser for ADMIT XML files.
    -------------------------------------------------------

    This module defines the AdmitParser class.
"""
from xml import sax
import copy
import numpy as np

from admit.util.AdmitLogging import AdmitLogging as logging
from admit.xmlio.DTDParser import DTDParser
import admit.util.bdp_types as bt
import admit.util.utils as utils
import admit.Summary as Summary
from admit.AT import AT
import admit.FlowManager as fm
import admit.util.admit_ast as aast

[docs]class AdmitParser(sax.handler.ContentHandler): """ Specialized XML parser for admit and bdp parsing. Parameters ---------- basedir : str The base directory being used to read in the ADMIT files. xmlFile : str The file to parse. Deafult: "admit.xml". Attributes ---------- xmlFile : str The file to parse. basedir : str The base directory being used to read in the ADMIT files. dtd : DtdReader Instance of the DtdReader to use. AT : List List for keeping track of the AT's found. curAT : AT Instance of the current AT being reconstructed. type : str Type of the curAT. name : str Name of the current node being parsed. ndarr : List List of elements that are numpy arrays. sets : List List of elements that are sets. userData : Dict Dictionary for the user data. summaryData : Dict Dictionary for the summaryData. summaryEntry : SummaryEntry Current instance of the SummaryEntry being reconstructed. flowmanager : FlowManager Local copy of the FlowManager for reconstruction. projmanager : Dict Project ID to base directory map. Util : various classes The current utility class being reconstructed. MultiImage : MultiImage class instance The current MultiImage being reconstructed. inflow : Boolean Whether the parser is currently reconstructing the flow manager. inAdmit : Boolean Whether the parser is currently reconstructing the ADMIT base class. inUtil : Boolean Whether the parser is currently reconstructing a utility class. inMulti : Boolean Whether the parser is currently reconstructung a MultiImage. inKeys : Boolean Whether the parser is currently reconstructing the keys of an AT. inSummary : Boolean Whether the parser is currently reconstructing the summary data. inSummaryEntry : Boolean Whether the parser is currently reconstructing a SummaryEntry. utilType : str The type of the current utility class being reconstructed. utilName : str The name of the current utility class being reconstructed. multiName : str The name of the MultiImage currently being reconstructed. summaryName : str The name of the summary being reconstructed. summaryEntryName : str The name of the SummaryEntry being reconstructed. admit : ADMIT The base class being reconstructed. metadataName : str The name of the current metadata node. inBDP : Boolean Whether the parser is currently reconstructing a BDP. inAT : Boolean Whether the parser is currently reconstructing an AT. tempdata : str String for holding large data that need to be reconstructed. """ def __init__(self, basedir="", xmlFile="admit.xml"): self.xmlFile = xmlFile self.basedir = basedir # initialize the parent class sax.handler.ContentHandler.__init__(self) # get the dtd info self.dtd = DTDParser(xmlFile) self.dtd.parse() self.AT = [] # intialize all of the variables that track everything as it is # being reconstructed self.curAT = None self.BDP = None self.type = None self.name = None self.ndarr = [] self.sets = [] self.userData = None self.summaryData = None self.summaryEntry = None self.flowmanager = None self.projmanager = None self.inflow = False self.inAdmit = False self.inKeys = False self.inSummary = False self.inSummaryEntry = False self.summaryName = None self.summaryEntryName = None self.metadataName = None self.admit = None self.Util = None self.MultiImage = None self.inUtil = False self.inMulti = False self.utilType = "" self.utilName = "" self.multiName = "" self.inBDP = False self.inAT = False self.tempdata = "" self.flowdata = ""
[docs] def getBDP(self): """ Return the current BDP Parameters ---------- None Returns ------- The current BDP """ # return the generated BDP return copy.deepcopy(self.BDP)
[docs] def setadmit(self, admit): """ Set the base class to the given class Parameters ---------- admit : ADMIT The class to set the base to Returns ------- None """ self.admit = admit
[docs] def getflowmanager(self): """ Returns the FlowManager instance Parameters ---------- None Returns ------- The current FlowManager instance """ return self.flowmanager
[docs] def getAT(self): """ Return the list of reconstructed AT's Parameters ---------- None Returns ------- List of AT's """ # return the generated AT list return self.AT
[docs] def startElement(self, name, attrib): """ Method called whenever a new element is found. Dtd validation is done for each node and attribute. This method is only called by the SAX parser iteself. Parameters ---------- name : str The name of the current node attrib : Dict Dictionary of any attributes and their values that was found by the parser Returns ------- None """ # get the type of the data self.tempdata = "" temp = str(attrib.get("type")) # figure out where the node belongs if bt.ADMIT == name or temp == bt.AT: self.dtd.check(name.upper()) else: if self.inAT: self.dtd.check(name + "_" + self.curAT.show().upper(), "type", temp) else: self.dtd.check(name, "type", temp) if name == bt.FLOWMANAGER: self.inflow = True self.name = name if self.inBDP: self.dtd.check(name, "type", temp) if name == "_keys": self.inKeys = True return # find out what type of data the node contains if temp == bt.STRING: self.type = str() elif temp == bt.BOOL: self.type = bool() elif temp == bt.FLOAT: self.type = float() elif temp == bt.LONG: self.type = long() elif temp == bt.NONE: target = None # handle the utility, BDP and AT classes if self.inUtil: target = self.Util elif self.inBDP: target = self.BDP elif self.inAT: target = self.curAT else: target = self.admit try: setattr(target, self.name, None) except AttributeError: print "Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % \ (self.name, str(type(target))) except: raise elif temp == bt.DICT: self.type = dict() self.ndarr = aast.literal_eval(attrib.get("ndarray")) self.sets = aast.literal_eval(attrib.get("set")) tname = name if self.inAT: tname += "_" + self.curAT.show().upper() self.dtd.check(tname, "ndarray", bt.STRING) self.dtd.check(tname, "set", bt.STRING) elif temp == bt.LIST: self.type = list() self.ndarr = aast.literal_eval(attrib.get("ndarray")) self.sets = aast.literal_eval(attrib.get("set")) tname = name if self.inAT: tname += "_" + self.curAT.show().upper() self.dtd.check(tname, "ndarray", bt.STRING) self.dtd.check(tname, "set", bt.STRING) elif temp == bt.SET: self.type = set() self.ndarr = aast.literal_eval(attrib.get("ndarray")) self.sets = aast.literal_eval(attrib.get("set")) tname = name if self.inAT: tname += "_" + self.curAT.show().upper() self.dtd.check(tname, "ndarray", bt.STRING) self.dtd.check(tname, "set", bt.STRING) elif temp == bt.INT: self.type = int() elif temp == bt.TUPLE: self.type = tuple() self.ndarr = aast.literal_eval(attrib.get("ndarray")) self.sets = aast.literal_eval(attrib.get("set")) tname = name if self.inAT: tname += "_" + self.curAT.show().upper() self.dtd.check(tname, "ndarray", bt.STRING) self.dtd.check(tname, "set", bt.STRING) elif temp == bt.NDARRAY: self.type = np.ndarray([]) elif name == bt.ADMIT: self.inAdmit = True elif temp.title() in bt.UTIL_LIST or temp.upper() == bt.MULTIIMAGE: self.inUtil = True if temp.upper() == bt.MULTIIMAGE: self.multiName = name self.MultiImage = utils.getClass("util", "MultiImage") self.inMulti = True else: self.Util = utils.getClass("util", temp.title()) self.utilName = name self.utilType = temp elif temp == bt.AT: if self.flowmanager is None: raise Exception("FlowManager not initialized, xml in wrong order") self.inAT = True try: self.curAT = utils.getClass("at", name) except Exception, e: raise Exception("Could not create class of type %s because %s" % (name, str(e))) elif name == bt.BDP: self.type = temp self.inBDP = True try: self.BDP = utils.getClass("bdp", temp) except Exception, e: raise Exception("Could not create class of type %s because %s" % (temp, str(e))) elif temp == bt.SUMMARY: self.summaryData = Summary.Summary() self.inSummary = True self.summaryName = name elif temp == bt.SUMMARYENTRY: self.summaryEntry = Summary.SummaryEntry() self.inSummaryEntry = True self.summaryEntryName = name elif name == "metadata" and self.inSummary: self.metadataName = str(attrib.get("name")) self.dtd.check(name, "name", bt.STRING) self.summaryData._metadata[self.metadataName] = []
[docs] def endElement(self, name): """ Method called whenever the end of an xml element is reached. This method is only called by the SAX parser iteself. Parameters ---------- name : str The name of the node that just ended Returns ------- None """ # reset the tracking stuff, add BDP's to AT's, AT's to the flowmanager # reconstruct any nodes that spanned multiple lines if name == self.utilName: # add the utility classes to the appropriate parent class # Images always get added to MultiImages if self.inMulti: self.MultiImage.addimage(copy.deepcopy(self.Util), self.Util.name) elif self.inBDP: setattr(self.BDP, self.utilName, copy.deepcopy(self.Util)) elif self.inAT: setattr(self.curAT, self.utilName, copy.deepcopy(self.Util)) self.inUtil = False self.utilName = "" elif name == self.multiName: if self.inBDP: setattr(self.BDP, self.multiName, copy.deepcopy(self.MultiImage)) elif self.inAT: setattr(self.curAT, self.multiName, copy.deepcopy(self.MultiImage)) self.multiImageName = "" self.inMulti = False self.inUtil = False elif name == bt.BDP: # one last validation run self.BDP._baseDir = self.basedir if not self.dtd.checkAll(): logging.info("Some required nodes missing from xml file, attempting to continue anyway.") elif name == bt.FLOWMANAGER: temp = aast.literal_eval(self.flowdata) for key in ["depsmap", "varimap"]: if key in temp: temp[key] = eval(temp[key]) self.flowmanager = fm.FlowManager(**temp) self.inflow = False elif isinstance(self.type, str): if self.inUtil: target = self.Util elif self.inBDP: target = self.BDP elif self.inAT: target = self.curAT elif self.inSummaryEntry: target = self.summaryEntry elif name == "projmanager": target = self else: target = self.admit self.setattr(target, name, self.tempdata) self.tempdata = "" elif isinstance(self.type, list) or isinstance(self.type, dict) \ or isinstance(self.type, tuple) or isinstance(self.type, set): temp = aast.literal_eval(self.tempdata) if self.inUtil: target = self.Util elif self.inBDP: target = self.BDP elif self.inAT: target = self.curAT elif self.inSummaryEntry: target = self.summaryEntry elif name == "projmanager": target = self else: target = self.admit for i in self.ndarr: temp[i] = np.array(temp[i], dtype=object) for i in self.sets: temp[i] = set(temp[i]) if isinstance(self.type, tuple): temp = tuple(temp) elif isinstance(self.type, set): temp = set(temp) try: self.setattr(target, name, temp) except AttributeError: logging.info("Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % (self.name, str(type(target)))) except: raise elif isinstance(self.type, np.ndarray): temp = aast.literal_eval(self.tempdata) if self.inUtil: target = self.Util elif self.inBDP: target = self.BDP elif self.inAT: target = self.curAT else: target = self.admit try: self.setattr(target, self.name, np.array(temp, dtype=object)) except AttributeError: logging.info("Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % (self.name, str(type(target)))) except: raise elif self.inAT and name == self.curAT.show(): self.inAdmit = False # one last validation run self.curAT._bdp_in = [None] * len(self.curAT._bdp_in_map) self.curAT._bdp_out = [None] * len(self.curAT._bdp_out_map) self.curAT._baseDir = self.basedir at = copy.deepcopy(self.curAT) self.AT.append(at) self.flowmanager[at._taskid] = at self.curAT = None self.inAT = False elif name == bt.ADMIT: self.inAdmit = False if not self.dtd.checkAll(): print "Some required nodes missing from admit.xml file, attempting to continue anyway" elif name == "_keys": self.inKeys = False elif name == self.summaryEntryName: self.summaryData._metadata[self.metadataName].append(copy.deepcopy(self.summaryEntry)) self.summaryEntryName = None self.inSummaryEntry = False elif name == self.summaryName: self.inSummary = False elif name == self.metadataName: self.metadataName = None else: self.ndarr = [] self.sets = [] self.type = None self.name = None self.ndarr = False
[docs] def setattr(self, target, name, value): """ Method to set attributes in a class Parameters ---------- target : object The class to which the paremeters is being set name : str The name of the attribute to set value : various The value to set the attribute to """ # if we are in the keys the treat the special if self.inKeys: target.setkey(name, value, True) else: setattr(target, name, value)
[docs] def getattr(self, target, name): """ Method to get the value of a specific data member from the given class Parameters ---------- target : object The class from which the data value will be obtained name : str The name of the data member whose value will be obtained Returns ------- Various, the value of the requested data member, None if it does not exist """ if self.inKeys: target.getkey(name) else: if hasattr(target, name): return target.get(name) else: return None
[docs] def characters(self, ch): """ Method called whenever characters are detected in an xml node This method does some dtd validation. This method is only called by the SAX parser iteself. Parameters ---------- ch : unicode characters Returns ------- None """ target = None char = str(ch).strip() if char.isspace() or not char: return # determine which class the data are getting writtrn to if self.inUtil: target = self.Util elif self.inBDP: target = self.BDP elif self.inAT: target = self.curAT elif self.inSummaryEntry: target = self.summaryEntry elif self.inSummary: target = self.summaryData else: target = self.admit # a list or dictionary has to be decoded if isinstance(self.type, list) or isinstance(self.type, dict) \ or isinstance(self.type, tuple) or isinstance(self.type, set) \ or isinstance(self.type, np.ndarray) or isinstance(self.type, str): if self.inflow: self.flowdata += char else: self.tempdata += char else: # check the version if self.name == "_version": ver = self.getattr(target, self.name) vercheck = utils.compareversions(ver, str(char)) if vercheck < 0: # newer read in logging.warning("Version mismatch for %s, data are a newer version than current software, attempting to continue." % target.getkey("_type")) elif vercheck > 0: # older read in logging.warning("Version mismatch for %s, data are an older version than current software, attempting to continue." % target.getkey("_type")) else: try: self.setattr(target, self.name, self.getData(char)) except AttributeError: logging.info("Data member %s is not a member of %s. This may be due to a version mismatch between the data and your software, attempting to continue." % (self.name, str(type(target)))) except: raise del ch
[docs] def close(self): """ Method to close out the reader, called only by the sax parser Parameters ---------- None Returns ------- None """ return
[docs] def getData(self, item): """ Method used to convert string to designated type. Parameters ---------- input: str The string to convert Returns ------- Various types, based on the expected type of the xml node """ if isinstance(self.type, bool): return bool(int(item)) if isinstance(self.type, int): return int(item) if isinstance(self.type, long): return long(item) if isinstance(self.type, float): return float(item) if isinstance(self.type, str): return str(item)