Source code for admit.util.Table

""" .. _Table-api:

    **Table** --- Raw tabular data base.
    ------------------------------------

    This module defines the Table class for TABLE entries in BDPs.
"""

#system imports
import numpy as np
import xml.etree.cElementTree as et
import copy
import textwrap
import ast

# ADMIT imports
from UtilBase import UtilBase
import bdp_types as bt


[docs]class Table(UtilBase): """ Defines the basic table structure used in ADMIT. The Table class is a container for holding data in tabular format. The table can hold data in column-row format and also in plane-column-row format. Data can be added in instantiation, by columns, by rows, or by entire planes. Parameters ---------- keyval : dict Any valid attributes can be specified to the constructor. Attributes ---------- columns : List containing the column headers, optional (defaults to an empty list) units : List containing the units of each column, optional (defaults to an empty list) planes : List containing labels for each plane, optional (defaults to an empty list) data : A numpy array containing the data, can be 1D, 2D, or 3D, optional (defaults to an empty array) description : A string for a description/caption of the table, optional (defaults to an empty string) """ def __init__(self, **keyval): """Constructor """ self.columns = [] # column labels self.units = [] # units of columns self.planes = [] # label for planes self.data = np.array([]) # 1d, 2d, 3d self.description = "" UtilBase.__init__(self, **keyval) def __str__(self): print bt.format.BOLD + bt.color.GREEN + "Table :" + bt.format.END if len(self.data.shape) < 3: self.exportTable("/dev/stdout") else: for plane in range(self.data.shape[2]): print bt.format.BOLD + "Plane : " + str(plane) + bt.format.END self.exportTable("/dev/stdout", plane=plane) print "\n" return "\n" def _jsondict(self,prefix=False): """prepare as json formatted. note simply using json.dump() will not work because numpy arrays are not JSON serializable. Furthermore json.dump(self.data.tolist()) is not useful because it spits out a bare list, when what we need is a dict. 2D TABLES ONLY! """ # have to replace ' with " for javascript to interpret # kooky two-step process needed # http://stackoverflow.com/questions/13409559/ s = str(self.columns) colstr = s.replace("\\'", 'REPLACEME').replace("'", '"').replace('REPLACEME', "\\'") s = str(self.units) unitsstr = s.replace("\\'", 'REPLACEME').replace("'", '"').replace('REPLACEME', "\\'") if prefix: outstr = '"linetable":'+ '{\n"columns":' + colstr +',\n"units":'+unitsstr+',\n' else: outstr = '{\n"columns":' + colstr +',\n"units":'+unitsstr+',\n' datastr = '"lines":[' datalist = self.data.tolist() j =0 for line in datalist: linestr = '{' for i in range(len(line)): linestr = linestr+'"'+self.columns[i]+'":"'+line[i]+'"' if i != len(line)-1: linestr = linestr + ',' linestr = linestr + '} ' if j != len(datalist)-1: linestr = linestr + ',\n' j+=1 datastr = datastr+linestr outstr = outstr+datastr+']\n}\n' return outstr
[docs] def html(self,css=None) : """Create an HTML representation of the table Parameters ---------- css : str A string that may refer to a CSS or style parameter. This can be used for special formatting of the table, e.g. striping. Default: No extra formatting. Returns ------- string HTML <table> representation. """ if css: tablestr = '<h3>%s</h3>\n<table %s><thead><tr>' % (self.description,css) else: tablestr = '<h3>%s</h3>\n<table><thead><tr>' % self.description for h in self.columns: tablestr = tablestr + '<th>%s</th>' % h tablestr = tablestr + '</tr>\n' for u in self.units: tablestr = tablestr + '<th>[%s]</th>' % u tablestr = tablestr + '</tr></thead>\n<tbody>' np.set_string_function(None) np.set_printoptions( threshold = None, nanstr = 'NaN', infstr = 'Inf', formatter={'float' : '<td>{:.3E}</td>'.format, #'str_kind' : '<td>{}</td>'.format 'str_kind' : lambda x: self._formatcell(x) }) for row in self.data: #strip beginning and ending [,] from string. rowstr = str(row)[1:-1] tablestr = tablestr + '<tr>'+rowstr+'</tr>\n' tablestr = tablestr + '</tbody></table>\n' np.set_printoptions(formatter=None) return tablestr
def _formatcell(self,str_value): """Format a cell for an HTML table. This method is needed to deal with numpy array's unfortunate feature of converting all numbers to strings in an array if any cell in the array is string. Parameters: ---------- str_value: str The string from a cell of a numpy array. Returns ------- HTML-formatted cell: <td>value</td>. If the input value can be converted to a float, return it in scientific notation %.3E, otherwise return the string """ try: # if float conversion works, then the string is really a # number and we can use scientific notation q = float(str_value) return '<td>{:.3E}</td>'.format(q) except: return '<td>{}</td>'.format(str_value)
[docs] def exportTable(self, fileName, plane=0, cols=[], fixcols=[]): """ Method to export a table to ascii text Note only one plane at the time can be written. Alternatively one could allow multiple planes, and use another keyword for the fixcols=[], i.e. the columns that do not change per plane. This has not been implemented. Parameters ---------- fileName : str The name of the file to write the table to plane : int or list, optional What plane to export, defaults to the first plane (0). A list can be given if multiple planes need to be written. cols : if given, a subset of these named columns will be written Columns are written in the order they were listed originally, not the order in this given list. For multi-plane tables, these should be the columns that vary. fixcols : if given, these are the named columns that are common for all planes. Returns ------- None """ # @todo implement multiple planes using a fixcols= # @todo csv=True/False option? # # if cols was not given, use all columns from the table for output if len(cols) == 0: cols = self.columns f = open(fileName, 'w') # Determine if the data are 2D or 3D and act appropriately if len(self.data.shape) == 2: # header first line = "#|" for i in cols : line += i + "\t|" f.write(line + "\n") x, y = self.data.shape for i in range(0, x) : line = "" for j in range(0, y) : if self.columns[j] not in cols: continue line += str(self.data[i][j]) + "\t" f.write(line + "\n") elif len(self.data.shape) == 3 and type(plane) == type([]): line = "#|" for i in cols: line += i + "\t|" f.write(line + "\n") x, y, z = self.data.shape if(plane > z) : raise Exception("Requested plane does not exist in table") for i in range(0, x) : line = "" for j in range(0, y) : if self.columns[j] not in cols: continue line += str(self.data[i][j][plane]) + "\t" f.write(line + "\n") elif len(self.data.shape) == 3: line = "#|" for i in cols: line += i + "\t|" f.write(line + "\n") x, y, z = self.data.shape if(plane > z) : raise Exception("Requested plane does not exist in table") for i in range(0, x) : line = "" for j in range(0, y) : if self.columns[j] not in cols: continue line += str(self.data[i][j][plane]) + "\t" f.write(line + "\n") else: print "Cannot write out this table at this time. len(data.shape) = %d" % len(self.data.shape) f.close()
# these methods allow for the addition of columns, rows, and planes to existing tables
[docs] def addColumn(self, data, col=""): """ Add a column to the table Adds the given column to the table and recomputes the minimum and maximum. The column must have the same length as the other columns or numpy will throw an exception. Parameters ---------- data : list or numpy array The data of the column to be added col : str, optional The name of the column Returns ------- None """ # add the data to the array, converting as necessary if isinstance(data, list): data = np.array([data]) self.data = np.concatenate((self.data, data.T), axis=1) # add the column name to the list self.columns.append(col)
[docs] def addRow(self, row): """ Add a row to the table Adds the given row to the table and recomputes the minimum and maximum. Parameters ---------- row : list or numpy array The data of the row to be added Returns ------- None """ # add the row, converting as necessary if isinstance(row, list): row = np.array([row], dtype=object) #print row.shape #print self.data.shape if len(self.data.shape) == 1 and self.data.shape[0] == 0: self.data = copy.deepcopy(row) else: self.data = np.concatenate((self.data, row), axis=0)
#print self.data.shape, "XX"
[docs] def addPlane(self, data, plane=""): """ Add a plane to the table Adds the given plane to the table and recomputes the minimum and maximum. The plane must have the same dimensions as the other planes or numpy will throw an exception. Parameters ---------- data : list or numpy array The data of the plane to be added plane : str, optional The name of the plane. See getPlane() how to access a plane Returns ------- None """ # add the plane, converting as necessary if isinstance(data, list): data = np.array([data]) sh = self.data.shape if len(sh) == 1: if sh[0] != 0: raise Exception("Data in this table are only 1D, you cannot add a plane. Try using addRow.") else: self.data = data elif len(sh) == 2: self.data = np.dstack([self.data, data]) else: if len(data.shape) == 2: self.data = np.concatenate((self.data, np.expand_dims(data, axis=2)), axis=2) else: self.data = np.concatenate((self.data, data), axis=2) # add the name to the planes self.planes.append(plane)
[docs] def setData(self, data): """ Set the data of the table all at once Parameters ---------- data : list or numpy array The actual data to insert into the table Returns ------- None """ # set the data, converting as necessary if isinstance(data, list) : data = np.array(data) self.data = data
[docs] def getColumnByName(self, name, plane=0, typ=None): """ Get a column by its name Parameters ---------- name : str The name of the column to retrieve plane : int The plane to retrieve it from if the table is 3D typ : various The data type to convert the returning data to. Default: None (no conversion) Returns ------- The data from the column as a numpy array, or None if the column name does not exist. """ if len(self.data.shape) == 3: try: i = self.columns.index(name) temp = self.data.T[plane] if typ is None: return temp[i] else: return temp[i].astype(typ) except: return None try : i = self.columns.index(name) if typ is None: return self.data.T[i] else: return self.data.T[i].astype(typ) except : return None
[docs] def getFullColumnByName(self, name, typ=None): """ Method to get a full column (single column from all planes) by name Parameters ---------- name : str The name of the column to get typ : various The data type to convert the returning data to. Default: None (no conversion) Returns ------- Numpy array containing the data of the full column. """ try: temp = np.array([self.getColumnByName(name, 0)]) if len(self.data.shape) < 3: if len(temp.shape) == 2: if temp.shape[0] == 1 and temp.shape[1] == 1: if typ is None: return np.reshape(temp, -1) else : return np.reshape(temp, -1).astype(typ) if typ is None: return np.squeeze(temp) else: return np.squeeze(temp).astype(typ) for i in range(1, self.data.shape[2]): col = np.array([self.getColumnByName(name, i)]) temp = np.concatenate((temp, col), axis=0) if typ is None: return temp else: return temp.astype(typ) except: raise
[docs] def getColumn(self, col, plane=0, typ=None): """ Get a single column by its index Parameters ---------- col : int The index of the column to retrieve plane : int The plane to retrieve it from if the table is 3D typ : various The data type to convert the data to. Default: None Returns ------- The data from the column as a numpy array, or None if the column index does not exist. """ if len(self.data.shape) == 3: try: temp = self.data.T[plane] if typ is None: return temp[col] else: return temp[col].astype(typ) except: return None try: if typ is None: return self.data.T[col] else: return self.data.T[col].astype(typ) except: return None
[docs] def getHeader(self): """ Get the header information Parameters ---------- None Returns ------- list of the column headers """ return self.columns
[docs] def getUnits(self): """ Get the units information Parameters ---------- None Returns ------- list of the units headers """ return self.units
[docs] def getRow(self, row): """ Get data from a specific row Parameters ---------- row : int The row to get Returns ------- a numpy array of the data of the specified row, or None if the row does not exist """ try: return self.data[row] except: return None
[docs] def getRowAsDict(self, row): """ Get data from a specific row in dictionary format with the column headings as keys. If there are no column headings or two column headings are identical or column header has an empty string an exception is thrown. Note: this is not terribly useful for multi-plane Tables. Parameters ---------- row : int The row to get Returns ------- A dictionary of the data of the specified row, or None if the row does not exist """ # check for empty column header list if not self.columns: errmsg = "Can't make a dictionary -- Table has no column headers defined" raise Exception, errmsg # check for duplicates if len(self.columns) != len(set(self.columns)): errmsg = "Can't make a dictionary -- Table has duplicate column headers" raise Exception, errmsg try: x = dict() for i in range(len(self.data[row])): #check first for empty column header if not self.columns[i]: errmsg = "Can't make a dictionary -- Table has empty column header %d" % i raise Exception, errmsg x[self.columns[i]] = self.data[row][i] return x except IndexError: return None
[docs] def clear(self, full=False): """ Method to clear out the data from the table Parameters ---------- full : bool If True then clear header and other meta data also Default: False Returns ------- None """ del self.data self.data = np.array([]) # 1d, 2d, 3d self.planes = [] # label for planes if full: self.columns = [] # column labels self.units = [] # units of columns self.description = ""
[docs] def getPlane(self, pln): """ Method to get a single plane from the table Parameters ---------- pln : int The plane number to get (0 based) No Default. Returns ------- Numpy array containing the requested plane """ if len(self.shape()) < 3: return copy.deepcopy(self.data) return copy.deepcopy(self.data.T[pln].T)
[docs] def next(self): """ Method to get the next row from a table Parameters ---------- None Returns ------- List of the next row, None if there are no more. """ if not hasattr(self, "lastrow"): self.lastrow = 0 return self.getRow(0) self.lastrow += 1 return self.getRow(self.lastrow)
[docs] def rewind(self): """ Reset the "next" counter to the beginning Parameters ---------- None Returns ------- None """ del self.lastrow
[docs] def shape(self): return self.data.shape
[docs] def serialize(self): """Create a string representation of the table that can be converted to native Python structures with ast.literal_eval or back to a Table with deserialize(). Intended for the summary but can be used wherever. Parameters ---------- None Returns ------- A string representation of the Table that can be converted back to a Table with deserialize(). """ x = dict(self.__dict__) # convert the numpy array to a Python list # so we can write it as a string. x["data"] = x["data"].tolist() return str(x)
[docs] def deserialize(self,serial): """Create a Table from serialized data created by serialize(). Parameters ---------- serial : The string representation of a Table in the format from deserialize(). Returns ------- None """ # Do not convert directly to self.__dict__ because # the Table structure may change between versions, e.g. # an attribute may be added or deleted. # So we can only safely convert attributes which are valid # for this Table instance. x = ast.literal_eval(serial) # convert from Python list to numpy array x["data"] = np.array(x["data"]) for i in self.__dict__: if i in x: self.__dict__[i] = x[i]
def __eq__(self, table): """Define equivalency for two Tables Parameters ---------- table : Table The Table to compare to this one. Returns ------- Boolean Whether or not the two Tables are identical """ if(not isinstance(table,self.__class__)): return False try: for i in self.__dict__: if i == "data": #np.allclose is only for numeric types! #if not np.allclose(self.data, table.data): # ideally we want np.array_equal(self.data,table.date), # but for numpy version <1.8, np.array_equal can't # compare strings. So for now, use np.all(x==y). if not np.all(self.data == table.data): return False continue if cmp(getattr(self, i), getattr(table, i)) != 0: return False except: return False return True def __len__(self): """The Table length is defined as the number of rows in the table not including the column header and units, i.e. the number of data rows. Returns ------- int number of data rows in the table. """ # The shape tuple is (planes,rows,columns) for 3D # but (rows,columns) for 2D # If the data array is empty then the tuple value is # (0,) -- so we must check shape[0] to see if the Table is # empty if self.data.shape[0] == 0: return 0 # if len(self.data.shape) == 2: return self.data.shape[0] elif len(self.data.shape) == 3: return self.data.shape[1] else: return 0