Source code for admit.xmlio.DTDParser

""" .. _DTDParser:

    DTDParser --- Parses DTD information within an XML file.
    --------------------------------------------------------

    This module defines the DTDParser class.
"""

from admit.util.AdmitLogging import AdmitLogging as logging
import pprint
pp = pprint.PrettyPrinter(indent=4)

[docs]class DTDParser(object): """ Class for parsing a dtd and holding the data for validation with XML. Reads in the dtd information at the top of the given file and constructs a dictionary based on its contents. Validation is done against this dictionary. A customized DTD validator was needed as there is no dtd validator for the SAX parser in python at the time this code was written. Parameters ---------- xmlFile : str The xml file to read the dtd from. Default: None. Attributes ---------- xmlFile : str The xml file to read the dtd from. entities : dict Dictionary for the memory model of the dtd. at : AT The current AT. """ def __init__(self, xmlFile=None): self.xmlFile = xmlFile self.entities = dict() self.at = None
[docs] def checkAll(self): """ Method to check the dtd structure to see if all expected nodes were found. Parameters ---------- None Returns ------- Boolean, whether or not all nodes were found """ #pp.pprint(self.entities) for i in self.entities: if not self.entities[i]["found"]: print self.xmlFile logging.info(str(i) + " not found") return False for a in self.entities[i]["attrib"]: if not self.entities[i]["attrib"][a]["found"]: print "2",self.xmlFile logging.info(str(i) + " " + str(a) + " not found") return False return True
[docs] def parse(self, xmlFile=None): """ Method to parse the xml file for dtd information Parameters ---------- xmlFile : str The name of the xml file to search, does not need to be specified if the file was given in the constructor. Default : None Returns ------- None """ # parse the dtd and generate the hierarchy if self.xmlFile is None: if xmlFile is None: raise Exception("No xml file to parse") self.xmlFile = xmlFile f = open(self.xmlFile, 'r') lines = f.readlines() f.close() for line in lines: # treat the different entries appropriately if "<!ELEMENT" in line: en = line.split() name = en[1] if en[1].endswith("_AT"): self.at = "_" + en[1] elif self.at is not None: name += self.at self.entities[name] = {"found" : False, "attrib": {}} elif "<!ATTLIST" in line: at = line.split() values = at[3].replace(")", "") values = values.replace("(", "") values = values.split("|") name = at[1] if self.at is not None: name += self.at self.entities[name]["attrib"][at[2]] = {"found" : False, "values": values} elif "]>" in line: break
[docs] def check(self, name, attrib=None, value=None): """ Method to check a node for validity. Validity includes correct name and data type. Parameters ---------- name : str The name of the node being checked attrib : str The attribute of the node being checked, if any. Default: None value : str The type of the attribute being checked (e.g. bt.INT) Default: None """ # check a node for validity try: # note that the node has been found self.entities[name]["found"] = True # if there is an attribute specified then check it too # if the attribute was not expected just print a note to the screen if attrib is not None: try: if not value in self.entities[name]["attrib"][attrib]["values"] \ and not "ANY" in self.entities[name]["attrib"][attrib]["values"]: raise Exception("DTDParser.check: Value %s for attribute %s is not a valid entry (attribute = %s) (file %s)" % (value, name, attrib, self.xmlFile)) self.entities[name]["attrib"][attrib]["found"] = True except KeyError: logging.info("Attribute %s for %s not listed in DTD, malformed xml detected (%s)" % (attrib, name, self.xmlFile)) logging.info("Inconsistency between dtd and xml detected, continuing") except: logging.info("Unknown error encountered while parsing attribute %s for %s (%s)" % (attrib, name, self.xmlFile)) raise except KeyError: logging.info("Data member %s is not a member of the dtd, xml inconsistent with definition (%s)" % (name, self.xmlFile)) except: raise
[docs] def checkAttribute(self, name, attrib, value=None): """ Method to check an attribute for validity. Validity includes correct name and data type. Parameters ---------- name : str The name of the node being checked attrib : str The attribute of the node being checked, if any. value : str The type of the attribute being checked (e.g. bt.INT) Default: None """ # check an attribute for validity try: if not value in self.entities[name]["attrib"][attrib]["values"] \ and not "ANY" in self.entities[name]["attrib"][attrib]["values"]: raise Exception("DTDParser.checkAttributes: Value %s for attribute %s is not a valid entry (file %s)" % (value, name, self.xmlFile)) self.entities[name]["attrib"][attrib]["found"] = True except KeyError: logging.info("Attribute %s not listed in DTD, malformed xml detected (%s)" % (attrib, self.xmlFile)) logging.info("Inconsistency between dtd and xml detected, continuing") except: logging.info("Unknown error encountered while parsing attribute %s (%s)" % (attrib, self.xmlFile)) raise