Source code for admit.xmlio.dtdGenerator

#! /usr/bin/env casarun
#-*- mode: python -*-
""" .. _Dtd-Generator:

    dtdGenerator --- Generates the ADMIT DTD files.
    -----------------------------------------------

    This module defines the DTD generator for ADMIT. The DTDs are used to
    validate the XML I/O.

    In addition to generating the DTDs for each AT and BDP, the following
    files are also created:

    - bdp_types.py ---   constants and types definition file

    - __init__.py ---    in both admit/at and admit/bdp

    Usage:

    ::

      import dtdGenerator
      dtdGenerator.generate()

"""
# system imports
import sys
import os
import types
import shutil
import numpy as np

# ADMIT imports
import admit.util.bdp_types as bt
from admit.util import UtilBase
import admit.util.utils as utils

# Just some useful constant strings
START = "<!ELEMENT "
END = "(#PCDATA)>\n"
ATSTART = "<!ATTLIST "
ATEND = ") #REQUIRED>\n"
HEADER = "<!-- The root node is BDP and has child nodes listed afterward,\n     a * indicates 0 or more instances\n     a ? indicates 0 or 1 instances\n     no added marker indicates 1 instance is expected\n -->\n"
MIDHEAD = "<!-- attributes for the different nodes\n     format: node attribute (choices if like an enum)\n     CDATA indicates character data, followed by the default value\n-->\n"
AUTOGEN = "This file is automatically generated by dtdGenerator.py,\n  based on files in admit/bdp and admit/at. Edit this file at your own risk."
atlist = []

[docs]class DtdGenerator(object):
    """ Class to generate the dtd files for ADMIT. The dtd's are generated by searching
        for all BDP, AT, and utility class files (those located in admit/util and inherit
        from UtilBase). Each file is then loaded and introspected, and dtd's are generated
        by both the name and the type of each class attribute.

        Parameters
        ----------
        None

        Attributes
        ----------
        util_class : dict
            Dictionary containing the utility class types as keys and a class instance
            as the values.

        util_string : dict
            Dictionary containing the utility class types as keys and the class description
            for the dtd as the values.

        util_dtd : dict
            Dictionary containing the utility class types as keys and the dtd entries
            as the values.

    """
    def __init__(self):
        self.util_class = {}
        self.util_string = {}
        self.util_dtd = {}
        self.generate()

[docs]    def getType(self, i, val, key):
        """ Method to generate a string version of the data type of the input value.

            Parameters
            ----------
            val : various
                The value whose type is to be determined

            key : str
                The name of the variable that holds the data. Used only to give
                sensible error messages.

            Returns
            -------
            A string containing the type information
        """
        typ = ""
        if isinstance(val, dict):
            typ = val[i]
        else:
            typ = getattr(val, i)
        # if this is inherits from the UtilBase class then return the type string of the class
        if isinstance(typ, UtilBase):
            return typ._type
        elif isinstance(typ, list):
            return bt.LIST
        elif isinstance(typ, dict):
            return bt.DICT
        elif isinstance(typ, np.ndarray):
            return bt.NDARRAY
        elif isinstance(typ, set):
            return bt.SET
        # BOOL must come before INT as BOOL is a subclass of INT and will be
        # incorrectly identified if it doesn't come first
        elif isinstance(typ, bool):
            return bt.BOOL
        elif isinstance(typ, int):
            return bt.INT
        elif isinstance(typ, long):
            return bt.LONG
        elif isinstance(typ, float):
            return bt.FLOAT
        elif isinstance(typ, tuple):
            return bt.TUPLE
        elif type(typ) in types.StringTypes:
            return bt.STRING
        else:
            raise Exception("Improper type: " + str(typ) + " found for attribute " + i + " in " + key)


[docs]    def writeEntry(self, k, v, dtd):
        """ Generic method to write out the given class attribute to the dtd.

            Parameters
            ----------
            k : str
                The keyword that is being written out.

            v : str
                The data type of the keyword (e.g. bt.FLOAT).

            dtd : file handle
                The handle to the file where the dtd is being written.
        """
        # if writing out a utility class then iterate through all of the items in the class
        if v in bt.UTIL_LIST:
            # multiimages are a special case
            if v.upper() == bt.MULTIIMAGE:
                dtd.write(START + k + "\t(images)>\n")
                dtd.write(ATSTART + k + " type (" + bt.MULTIIMAGE + ATEND)
                # listing of all data memebers and their type, any changes made to the
                # MultiImage base class must be reflected here
                listing = {"images"     : bt.DICT}
                # write out each of the data members
                for k, v in listing.iteritems():
                    self.writeEntry(k, v, dtd)
                self.writeEntry(bt.IMG, "Image", dtd)
            else:
                dtd.write(START + k + self.util_string[v] + "\n")
                dtd.write(ATSTART + k + " type (%s) #REQUIRED>\n" % (v.upper()))
                dtd.write(self.util_dtd[v])
        else:
            dtd.write(START + k + "\t\t" + END)
            if v == bt.LIST or v == bt.DICT or v == bt.TUPLE:
                dtd.write(ATSTART + k + " ndarray (" + bt.STRING + ") #REQUIRED>\n")
                dtd.write(ATSTART + k + " set (" + bt.STRING + ") #REQUIRED>\n")
            dtd.write(ATSTART + k + " type (%s) #REQUIRED>\n" % (v))


[docs]    def write_bdp(self, key, val):
        """ Method which takes a class name and BDP sub-class instance. It creates
            a dtd file for the class and collects relevant info for the file.

            Parameters
            ----------
            key : str
                the name of the class

            value : BDP
                an instance of the BDP sub-class

            Returns
            -------
            None
        """
        sys.stdout.write("Processing %s ..." % (key))
        sys.stdout.flush()
        # open the dtd file, we don't append, just write it from scratch
        sys.path.append(os.path.dirname(os.path.realpath(__file__)) + os.sep + "..")

        dtd = open(os.path.dirname(os.path.realpath(__file__)) + os.sep + "dtd" + os.sep + key + ".dtd", 'w')
        dtd.write("<!-- " + AUTOGEN + "\n-->\n")
        dtd.write(HEADER)

        listing = {}
        # write out the individual data elements based on their occurrence
        line = START + bt.BDP + "\t("
        # loop over all data nodes adding them to the appropriate list
        for i in val.__dict__:
            listing[i] = self.getType(i, val, key)
            line += i + ","
        line = line[:-1]
        line += ")>\n"

        dtd.write(line)
        dtd.write(ATSTART + "BDP type (" + key + ") #REQUIRED>\n")

        dtd.write("<!-- child nodes #PCDATA indicates parsable character data-->\n")
        for k, v in listing.iteritems():
            self.writeEntry(k, v, dtd)

        dtd.close()
        sys.stdout.write("Done\n")
        sys.stdout.flush()


[docs]    def write_at(self, key, val):
        """ Method which takes a class name and AT sub-class instance. It creates
            a dtd file for the class and collects relevant info for the file.

            Parameters
            ----------
            key : str
                the name of the class

            value : AT
                an instance of the AT sub-class

            Returns
            -------
            None
        """
        sys.stdout.write("Processing %s ..." % (key))
        sys.stdout.flush()
        # open the dtd file, we don't append, just write it from scratch
        sys.path.append(os.path.dirname(os.path.realpath(__file__)) + os.sep + "..")

        dtd = open(os.path.dirname(os.path.realpath(__file__)) + os.sep + "dtd" + os.sep + key + ".dtd", 'w')

        listing = {}
        # write out the individual data elements based on their occurrence
        line = START + val._type.upper() + "\t("
        # loop over all data nodes adding them to the appropriate list
        # skipping those that are derived or reconstructed
        for i in val.__dict__:
            if i == "_bdp_in" or i == "_bdp_out" or i == "_valid_bdp_in" or \
               i == "_valid_bdp_out" or i == "_keys" or i == "_type" or \
               i == "_bdp_out_length" or i == "_bdp_in_length" or \
               i == "_bdp_out_zero" or i == "_bdp_out_order_list" or \
               i == "_bdp_in_zero" or i == "_bdp_in_order_list" or \
               i == "_bdp_in_order_type":
                continue
            listing[i] = self.getType(i, val, key)
            line += i + ","
        line += "_keys"
        line += ")>\n"
        dtd.write(line)
        # process images, tables, tasks, and everything else adding their attributes
        dtd.write("<!-- child nodes #PCDATA indicates parsable character data-->\n")

        for k, v in listing.iteritems():
            self.writeEntry(k, v, dtd)

        listing = {}
        # write out the individual data elements based on their occurrence
        line = START + "_keys" + "\t("
        # loop over all data nodes adding them to the appropriate list
        for i in val._keys:
            listing[i] = self.getType(i, val._keys, key)
            line += i + ","
        line = line[:-1]
        line += ")>\n"
        line += ATSTART + "_keys type (" + bt.DICT + ATEND
        dtd.write(line)
        for k, v in listing.iteritems():
            self.writeEntry(k, v, dtd)
        dtd.close()

        sys.stdout.write("Done\n")
        sys.stdout.flush()


[docs]    def generate(self):
        """ Main method for generating the dtd. Searches through the BDP and AT
            directories and generates dtds for each file found.

            Parameters
            ----------
            None

            Returns
            -------
            None
        """
        # get a list of the BDPs
        bdp_files = []
        for file in os.listdir(os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "bdp" + os.sep):
            if file.endswith("_BDP.py") and not (file.startswith(".") or file.startswith("#")):
                bdp_files.append(file.replace(".py", ""))

        # get a list of the ATs
        at_files = []
        for file in os.listdir(os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "at" + os.sep):
            if file.endswith("_AT.py") and not (file.startswith(".") or file.startswith("#")):
                at_files.append(file.replace(".py", ""))

        # get list of utility classes
        util_files = []
        for file in os.listdir(os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "util" + os.sep):
            if file.endswith(".py") and not (file.startswith(".") or file.startswith("#") or file == "__init__.py"):
                fl = open(os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "util" + os.sep + file)
                lines = fl.readlines()
                for l in lines:
                    if "import" in l and "UtilBase" in l:
                        util_files.append(file.replace(".py", ""))
                        break

        bdp_files.sort()
        at_files.sort()
        util_files.sort()

        # create the bdp_types.py file
        types = open(os.sep + "tmp" + os.sep + str(os.getpid()) + ".bdp_types.py", 'w')
        typesfl = os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "util" + os.sep + "bdp_types.py"
        bdp_init = os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "bdp" + os.sep + "__init__.py"
        bdp_f = open(os.sep + "tmp" + os.sep + str(os.getpid()) + ".bdp__init__.py", 'w')
        at_init = os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep + "at" + os.sep + "__init__.py"
        at_f = open(os.sep + "tmp" + os.sep + str(os.getpid()) + ".at__init__.py", 'w')
        at_f.write("""\"\"\"AT Package\n   ----------\n\n   This package contains specific implementations of ADMIT Tasks, derived\n   from the AT base class.\n\"\"\"\n# This file is automatically generated, do not edit.\n# Standard, streamlined AT names.\n""")
        bdp_f.write("""\"\"\"BDP Package\n   -----------\n\n   This package contains specific implementations of ADMIT Basic Data Products, \n   derived from the BDP base class.\n\"\"\"\n# This file is automatically generated, do not edit.\n\n# Standard, streamlined BDP names.\n""")
        types.write("\"\"\"\n" + AUTOGEN + "\n\nThis file contains string constants useful for programming\n\"\"\"\n")
        sys.stdout.write("Writing types.py ...")
        sys.stdout.flush()
        # these are really just constant strings that will make life easier for programming
        # generic data types
        types.write("\n#Generics\n\n")
        types.write("GENERIC = \"Generic\"\n")
        types.write("ADMIT = \"ADMIT\"\n")
        types.write("AT = \"AT\"\n")
        types.write("BDP = \"BDP\"\n")
        types.write("INT = \"INT\"\n")
        types.write("FLOWMANAGER = \"flowmanager\"\n")
        types.write("FLOAT = \"FLOAT\"\n")
        types.write("DICT = \"DICT\"\n")
        types.write("LIST = \"LIST\"\n")
        types.write("BOOL = \"BOOL\"\n")
        types.write("LONG = \"LONG\"\n")
        types.write("STRING = \"STRING\"\n")
        types.write("NDARRAY = \"NDARRAY\"\n")
        types.write("TUPLE = \"TUPLE\"\n")
        types.write("NONE = \"NONE\"\n")
        types.write("SET = \"SET\"\n")

        types.write("SUMMARY = \"SUMMARY\"\n")
        types.write("SUMMARYENTRY = \"SUMMARYENTRY\"\n")
        types.write("METADATA = \"METADATA\"\n")
        types.write("REQUIRED = \"REQ\"\n")
        types.write("OPTIONAL = \"OPT\"\n")
        types.write("FOUND = \"FOUND\"\n")
        types.write("MISSING = \"MISSING\"\n")
        types.write("IMG = \"IMG\"\n")

        # Utility classes
        # generate the dtd strings and headers for the utility classes
        types.write("\n# Utility classes\n\n")
        for ut in util_files:
            types.write(ut.upper() + " = \"" + ut.upper() + "\"\n")
            self.util_class[ut] = utils.getClass("util", ut)
            items = {}
            for i in self.util_class[ut].__dict__:
                items[i] = self.getType(i, self.util_class[ut], ut)
            self.util_string[ut] = "\t("
            self.util_dtd[ut] = ""
            for k, v in items.iteritems():
                if k == "_type" or k == "_order":
                    continue
                self.util_string[ut] += k + ","
                self.util_dtd[ut] += START + k + "\t\t" + END
                if v == bt.LIST or v == bt.DICT or v == bt.TUPLE:
                    self.util_dtd[ut] += (ATSTART + k + " ndarray (" + bt.STRING + ") #REQUIRED>\n")
                    self.util_dtd[ut] += (ATSTART + k + " set (" + bt.STRING + ") #REQUIRED>\n")
                self.util_dtd[ut] += (ATSTART + k + " type (%s) #REQUIRED>\n" % (v))
            self.util_string[ut] = self.util_string[ut][:-1] + ")>"
        types.write("UTIL_LIST = %s\n" % (str(self.util_class.keys())))


        # image data types
        types.write("\n# Image types\n\n")
        types.write("DATA = \"DATA\"\n")
        types.write("THUMB = THUMBNAIL = \"THUMBNAIL\"\n")
        types.write("AUX = AUXILIARY = \"AUXILIARY\"\n")
        types.write("imagedescriptor_types = [DATA, THUMB, AUX]\n")
        valid_image_types = ["FITS", "CASA", "MIRIAD", "GIF", "PNG", "JPG", "PDF", "PS", "SVG"]
        type_string = "image_types = ["
        for i in valid_image_types:
            types.write("%s = \"%s\"\n" % (i, i))
            type_string += i + ", "
        type_string = type_string[:-2] + "]"
        types.write("\n" + type_string + "\n")

        # write out the BDP types
        types.write("# BDP classes\n\n")
        for key in bdp_files:
            types.write(key.upper() + " = \"" + key + "\"\n")

        # write out the AT types
        types.write("\n# AT classes\n\n")
        for key in at_files:
            types.write(key.upper() + " = \"" + key + "\"\n")

        # colors for pretty printing
        types.write("\n\nclass color:\n")
        types.write("    PURPLE = ''\n")  # '\\033[95m'\n")
        types.write("    CYAN = ''\n")  # '\\033[96m'\n")
        types.write("    DARKCYAN = ''\n")  # '\\033[36m'\n")
        types.write("    BLUE = ''\n")  # '\\033[94m'\n")
        types.write("    GREEN = ''\n")  # '\\033[92m'\n")
        types.write("    YELLOW = ''\n")  # '\\033[93m'\n")
        types.write("    RED = ''\n")  # '\\033[91m'\n")
        types.write("    END = ''\n\n\n")  # '\\033[0m'\n\n")
        types.write("class format:\n")
        types.write("    BOLD = ''\n")  # '\\033[1m'\n")
        types.write("    UNDERLINE = ''\n")  # '\\033[4m'\n")
        types.write("    END = ''\n")  # '\\033[0m'\n")

        types.close()

        try:
            os.remove(typesfl + "c")
        except:
            pass
        shutil.move(os.sep + "tmp" + os.sep + str(os.getpid()) + ".bdp_types.py", typesfl)
        # load in the new bdp_types file
        reload(bt)
        bdp_f.write("from BDP" + " " * 24 + "import BDP" + " " * 24 + "as BDP\n")

        # generate the dtds for the BDPs
        for key in bdp_files:
            #line = "from " + key + " " * (27 - len(key)) + "import " + key + " " * (27 - len(key)) + "as " + key[:-4] + "\n"
            line = "from " + key + " " * (27 - len(key)) + "import " + key + " " * (27 - len(key)) + "as " + key + "\n"
            bdp_f.write(line)

        bdp_f.close()
        shutil.move(os.sep + "tmp" + os.sep + str(os.getpid()) + ".bdp__init__.py", bdp_init)
        for key in bdp_files:
            self.write_bdp(key, utils.getClass("bdp", key))

        # generate the dtds for the ATs
        ATTYPES = ""
        for key in at_files:
            #line = "from " + key + " " * (27 - len(key)) + "import " + key + " " * (27 - len(key)) + "as " + key[:-3] + "\n"
            line = "from " + key + " " * (27 - len(key)) + "import " + key + " " * (27 - len(key)) + "as " + key + "\n"
            at_f.write(line)
            ATTYPES += key.upper() + ","

        ATTYPES = ATTYPES[:-1]

        at_f.close()
        shutil.move(os.sep + "tmp" + os.sep + str(os.getpid()) + ".at__init__.py", at_init)
        for key in at_files:
            self.write_at(key, utils.getClass("at", key))

        # make the dtd for the ADMIT object
        admit = open(os.path.dirname(os.path.realpath(__file__)) + os.sep + "dtd" + os.sep + "admit.dtd", 'w')
        admit.write("<!-- " + AUTOGEN + "\n-->\n")
        admit.write(HEADER)
        admit.write(START + "ADMIT (userData,summaryData,project_id,name,pmode,ptype,flowmanager,projmanager,loglevel,astale,_loggername," + ATTYPES + ")>\n")
        admit.write(START + "userData " + END)
        admit.write(ATSTART + "userData type (DICT" + ATEND)
        admit.write(ATSTART + "userData ndarray (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(ATSTART + "userData set (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(START + "summaryData (metadata,_datatype,_description)>\n")
        admit.write(ATSTART + "summaryData type (SUMMARY" + ATEND)
        admit.write(START + "metadata (summaryEntry)>\n")
        admit.write(ATSTART + "metadata type (METADATA" + ATEND)
        admit.write(ATSTART + "metadata name (STRING" + ATEND)
        admit.write(START + "summaryEntry (_value,_taskname,_taskid,_taskargs)>\n")
        admit.write(ATSTART + "summaryEntry type (SUMMARYENTRY" + ATEND)
        admit.write(START + "_value " + END)
        admit.write(ATSTART + "_value type (" + bt.LIST + ATEND)
        admit.write(ATSTART + "_value ndarray (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(ATSTART + "_value set (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(START + "_taskname " + END)
        admit.write(ATSTART + "_taskname type (" + bt.STRING + ATEND)
        admit.write(START + "_taskid " + END)
        admit.write(ATSTART + "_taskid type (" + bt.INT + ATEND)
        admit.write(START + "_taskargs " + END)
        admit.write(ATSTART + "_taskargs type (" + bt.STRING + ATEND)
        admit.write(START + "_datatype " + END)
        admit.write(ATSTART + "_datatype type (" + bt.DICT + ATEND)
        admit.write(ATSTART + "_datatype ndarray (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(ATSTART + "_datatype set (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(START + "_description " + END)
        admit.write(ATSTART + "_description type (" + bt.DICT + ATEND)
        admit.write(ATSTART + "_description ndarray (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(ATSTART + "_description set (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(START + "project_id " + END)
        admit.write(ATSTART + "project_id type (INT" + ATEND)
        admit.write(START + "name " + END)
        admit.write(ATSTART + "name type (STRING" + ATEND)
        admit.write(START + "pmode " + END)
        admit.write(ATSTART + "pmode type (INT" + ATEND)
        admit.write(START + "ptype " + END)
        admit.write(ATSTART + "ptype type (INT" + ATEND)
        admit.write(START + "flowmanager " + END)
        admit.write(ATSTART + "flowmanager type (DICT" + ATEND)
        admit.write(ATSTART + "flowmanager ndarray (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(ATSTART + "flowmanager set (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(START + "projmanager " + END)
        admit.write(ATSTART + "projmanager type (DICT" + ATEND)
        admit.write(ATSTART + "projmanager ndarray (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(ATSTART + "projmanager set (" + bt.STRING + ") #REQUIRED>\n")
        admit.write(START + "loglevel " + END)
        admit.write(ATSTART + "loglevel type (INT" + ATEND)
        admit.write(START + "astale " + END)
        admit.write(ATSTART + "astale type (INT" + ATEND)
        admit.write(START + "_loggername " + END)
        admit.write(ATSTART + "_loggername type (STRING" + ATEND)

        admit.close()

        sys.stdout.write("Done\n")
        sys.stdout.flush()

[docs]def generate():
    DtdGenerator()