Source code for admit.util.Splatalogue

""" .. _splatalogue:

    **Splatalogue** --- Module for performing splatalogue queries.
    --------------------------------------------------------------

    Module to search Splatalogue.net via splat, modeled loosely on
    ftp://ftp.cv.nrao.edu/NRAO-staff/bkent/slap/idl/
    :author: Adam Ginsburg <adam.g.ginsburg@gmail.com>
    :updated for CASA:  Brian Kent <bkent@nrao.edu>
    :updated for ADMIT: Douglas Friedel <friedel@illinois.edu>

    Dependencies for python requests and astropy modules have been removed.
"""
import urllib
import urllib2
import json
import re

from admit.util.AdmitLogging import AdmitLogging as logging

column_headings_map = {'Log<sub>10</sub> (A<sub>ij</sub>)': 'log10(Aij)',
                       'E_U (K)': 'EU_K',
                       'Resolved QNs': 'QNs',
                       'S<sub>ij</sub>': 'Sij', }


[docs]def clean_column_headings(header, renaming_dict=column_headings_map): """ Rename column headings to shorter version that are easier for display on-screen / at the terminal Parameters ---------- header : dict A dictionary of the header keywords renaming_dict : dict A dictionary containing the old names (as the key) and the new names (as the value) Returns ------- The new header. """ for key in renaming_dict: header = [renaming_dict[key] if i == key else i for i in header] return header
[docs]class SpeciesLookuptable(dict): """ Simple class that extends the dictionary class so that keys can be searched """
[docs] def find(self, s, flags=0, return_dict=True): """ Search dictionary keys for a regex match to string s Parameters ---------- s : str String to compile as a regular expression return_dict : bool Return a dictionary if true or just the matching values if false. Default: True flags : int re (regular expression) flags. Default: 0 Returns ------- Subset of parent dictionary if return_dict, else list of values corresponding to matches """ R = re.compile(s, flags) out = SpeciesLookuptable(dict((k, v) for k, v in self.items() if R.search(k))) if return_dict: return out else: return out.values()
[docs]def species_lookuptable(filename='species.json'): """ Method to read in a file and turn it into a searchable dictionary Parameters ---------- filename : str The name of the file to read. Default: 'species.json' Returns ------- A lookup table with the contents of the input file """ with open(filename, 'r') as f: J = json.load(f) lookuptable = SpeciesLookuptable(dict((v, k) for d in J.values() for k, v in d.items())) return lookuptable
[docs]class Splatalogue(object): """ Class for searching for lines in the online Splatalogue database. The resulting Splatalogue tables can be returned as a list of dictionaries. Parameters ---------- kwargs : dict The keyword value pairs of the arguments, possible keywords are: **min_frequency** : float Lower limit of frequency search range in GHz. Default: 0.0 **max_frequency** : float Upper limit of frequency search range in GHz. Defualt: 100000.0 **band** : str The frequency band to search in. 'min/max_frequency' and 'band' are mutually exclusive. If both are given then 'band' is preferred. Possibles are "any": All bands "alma3": ALMA Band 3 (84-116 GHz) "alma4": ALMA Band 4 (125-163 GHz) "alma5": ALMA Band 5 (163-211 GHz) "alma6": ALMA Band 6 (211-275 GHz) "alma7": ALMA Band 7 (275-373 GHz) "alma8": ALMA Band 8 (385-500 GHz) "alma9": ALMA Band 9 (602-720 GHz) "alma10": ALMA Band 10 (787-950 GHz) "pf1": GBT PF1 (0.29-0.92 GHz) "pf2": GBT PF2 (0.91-1.23 GHz) "l": GBT/VLA L (1-2 GHz) "s": GBT/VLA S (1.7-4 GHz) "c": GBT/VLA C (3.9-8 GHz) "x": GBT/VLA X (8-12 GHz) "ku": GBT/VLA Ku (12-18 GHz) "kfpa": GBT KFPA (18-27.5 GHz) "k": VLA K (18-26.5 GHz) "ka": GBT/VLA Ka (26-40 GHz) "q": GBT/VLA Q (38-50 GHz) "w": GBT W (67-93.3 GHz) "mustang": GBT Mustang (80-100 GHz) Default: 'any' **top20** : list Limit results to the given top20 list(s). Possibles are: 'comet': molecules most likely found in comets 'planet': molecules most likely planetary atmospheres 'top20': most common molecules 'ism_hotcore': molecules most likely found in hot cores 'ism_darkcloud': molecules most likely found in dark clouds 'ism_diffusecloud': molecules most likely found in diffuse clouds Default: None **chemical_name** : str Limit results to the specified chemical, by name. Default: '' **line_lists** : list Which line list(s) to use for the search. Possibles are: 'Lovas': Search the Lovas/NIST list of molecules 'SLAIM': Search the SLAIM list of molecules 'JPL': Search the Jet Propulsion Lab list of molecules 'CDMS': Search the Cologne database of molecules 'ToyoMA': Search the ToyoMA list of moleules 'OSU': Search the Ohio State list of molecules 'Recomb': Search for recombination lines 'Lisa': Search the TopModel list 'RFI': Search for radio frequency interference lines Default: ['Lovas', 'SLAIM', 'JPL', 'CDMS', 'ToyoMA', 'OSU', 'Recomb', 'Lisa', 'RFI'] **line_strengths** : list Which line strength units should be reported, several can be given. Possibles are: 'ls1': JPL/CDMS 'ls2': Sijmu^2 'ls3': Sij 'ls4': Aij 'ls5': Lovas/SLAIM Default: ('ls1', 'ls2', 'ls3', 'ls4', 'ls5') **energy_levels** : list Which energies should be reported. Possibles are: 'el1': Lower state in cm^-1 'el2': Lower state in K 'el3': Upper state in cm^-1 'el4': Upper state in K Default: ['el1', 'el2', 'el3', 'el4'] **exclude** : list Which types of molecules to exclude from the search. Possibles are: 'potential': Species that may be found in the ISM 'atmospheric': Species know to be in the Earth's atmosphere 'probable': Species that might be found in the ISM Default: ['potential', 'atmospheric', 'probable'] **version** : str Which Splatalogue interface is being used. Default: 'v2.0' **only_NRAO_recommended** : bool Only return lines that are recommended by NROA. Can eliminate duplicate lines from multiple databases. Default: False **export** : bool Wherther splatalogue should return an export formatted list rather than HTML. Default: True **export_limit** : int The maximum number of lines to return. Default: 10000 **noHFS** : bool Whether or not to suppress hyperfine components in the results. Default: False **displayHFS** : bool Whether or not to return the hyperfine line strength. Default: False **show_unres_qn** : bool Whether or not to return the unresolved quantum numbers. Typically in a machine readable format. Default: False **show_upper_degeneracy** : bool Whether or not th return the upper state degeneracy of each transition. Default: False **show_molecule_tag** : bool Whether or not to return the molecule tag. Default: False **show_qn_code** : bool Whether or not to return the quantum number encoding (the encoding of 'show_unres_qn'). Default: False **show_lovas_labref** : bool Whether or not to return the lab reference frequency from the Lovas/NIST list. Default: False **show_lovas_obsref** : bool Whether or not to return the observational reference from the Loval/NIST list. Default: False **show_orderedfreq_only** : bool Default: False **show_nrao_recommended** : bool Whether or not to return the NRAO recommended rest frequency. Default: False **chem_re_flags** : int Default: 0 **energy_min** : float Minimum lower/upper state energy to search for. Units are specified in 'energy_type'. Default None **energy_max** : float Maximum lower/upper state energy to search for. Units are specified in 'energy_type'. Default None **energy_type** : str The energy units for the 'energy_min' and 'energy_max' columns. Possibles are: 'el_cm1': Lower state in cm^-1 'el_k': Lower state in K 'eu_cm1': Upper state in cm^-1 'eu_k': Upper state in K Default: None **intensity_lower_limit** : float Lower limit for the transition strength, units are specified in 'intensity_type'. Default: None **intensity_type** : str The units for the 'intensity_lower_limit' parameter. Possibles are: 'cdms_jpl': Cologne database/JPL intensity units 'aij': Einstein A units 'sijmu2': Smu^2, line strength times the square of the relevant dipole moment Default: None **transition** : str Seach for a specific transition (i.e. '(1-0)''). Default: None **fel** : bool Whether or not to remove results that have an uncertainty of over 50 MHz. Default: False Attributes ---------- None Examples -------- >>> from splatalogue import Splatalogue >>> sp = Splatalogue #Create an object of type Splatalogue >>> # Query by frequency range in units of GHz >>> sp.query_lines(min_frequency=114.0, max_frequency=116.0) >>> # Query by frequency range and chemical name >>> sp.query_lines(min_frequency=114.0, max_frequency=116.0, chemical_name=" CO ") >>> # Query by frequency range (no keywords needed and chemical name) >>> sp.query_lines(0.1,1000.0, chemical_name='Ethylene Glycol') >>> # Query by line list and energy >>> sp.query_lines(218.40, 218.50, energy_max=300, energy_type='eu_k', line_lists='JPL') >>> # Parse response into a list of dictionaries and store in result >>> result = sp.parse_response() """ #SLAP_URL = 'http://find.nrao.edu/splata-slap/slap' #NOT USED QUERY_URL = 'http://www.cv.nrao.edu/php/splat/c_export.php' TIMEOUT = 60 versions = ('v1.0', 'v2.0') LINES_LIMIT = 10000 ALL_LINE_LISTS = ('Lovas', # Lovas/NIST list http://physics.nist.gov/PhysRefData/ 'SLAIM', # SLAIM List http://physics.nist.gov/cgi-bin/micro/table5/start.pl 'JPL', # JPL list http://spec.jpl.nasa.gov/ 'CDMS', # Cologne database http://www.ph1.uni-koeln.de/vorhersagen/ 'ToyoMA',# Toyama Atlas http://www.sci.u-toyama.ac.jp/phys/4ken/atlas/ 'OSU', # Ohio state list 'Recomb', # recombination lines 'Lisa', # 13 Methyl formate list 'RFI') # RFI lines TOP20_LIST = ('comet', 'planet', 'top20', 'ism_hotcore', 'ism_darkcloud', 'ism_diffusecloud') FREQUENCY_BANDS = {"any":"Any", "alma3":"ALMA Band 3 (84-116 GHz)", "alma4":" ALMA Band 4 (125-163 GHz)", "alma5":" ALMA Band 5 (163-211 GHz)", "alma6":"ALMA Band 6 (211-275 GHz)", "alma7":"ALMA Band 7 (275-373 GHz)", "alma8":"ALMA Band 8 (385-500 GHz)", "alma9":"ALMA Band 9 (602-720 GHz)", "alma10":"ALMA Band 10 (787-950 GHz)", "pf1":"GBT PF1 (0.29-0.92 GHz)", "pf2":"GBT PF2 (0.91-1.23 GHz)", "l":"GBT/VLA L (1-2 GHz)", "s":"GBT/VLA S (1.7-4 GHz)", "c":"GBT/VLA C (3.9-8 GHz)", "x":"GBT/VLA X (8-12 GHz)", "ku":" GBT/VLA Ku (12-18 GHz)", "kfpa":"GBT KFPA (18-27.5 GHz)", "k":"VLA K (18-26.5 GHz)", "ka":" GBT/VLA Ka (26-40 GHz)", "q":"GBT/VLA Q (38-50 GHz)", "w":"GBT W (67-93.3 GHz)", "mustang":"GBT Mustang (80-100 GHz)", } def __init__(self, **kwargs): """ Initialize a Splatalogue query class with default arguments set. Frequency specification is required for *every* query, but any default keyword arguments (see `query_lines`) can be overridden here. """ self.data = self._default_kwargs() self.set_default_options(**kwargs)
[docs] def set_default_options(self, **kwargs): """ Method to modify the default options. Parameters ---------- kwargs : dict See the class description for the key words and their meaning Returns ------- None """ self.data.update(self._parse_kwargs(**kwargs))
[docs] def get_species_ids(self, restr=None, reflags=0): """ Get a dictionary of "species" IDs, where species refers to the molecule name, mass, and chemical composition. Parameters ---------- restr : str String to compile into an re, if specified. Searches table for species whose names match. Default: None reflags : int Flags to pass to `re`. Default: 0 Returns ------- The id of the species """ # loading can be an expensive operation and should not change at runtime: # do it lazily if not hasattr(self, '_species_ids'): self._species_ids = species_lookuptable() if restr is not None: return self._species_ids.find(restr, reflags) else: return self._species_ids
def _default_kwargs(self): kwargs = dict(min_frequency=0.0, #GHz max_frequency=100000.0, #GHz (100000 GHZ = 100 THz) chemical_name='', line_lists=self.ALL_LINE_LISTS, line_strengths=('ls1', 'ls2', 'ls3', 'ls4', 'ls5'), energy_levels=('el1', 'el2', 'el3', 'el4'), exclude=('potential', 'atmospheric', 'probable'), version='v2.0', only_NRAO_recommended=False, export=True, export_limit=self.LINES_LIMIT, noHFS=False, displayHFS=False, show_unres_qn=False, show_upper_degeneracy=False, show_molecule_tag=False, show_qn_code=False, show_lovas_labref=False, show_lovas_obsref=False, show_orderedfreq_only=False, show_nrao_recommended=False,) return self._parse_kwargs(**kwargs) def _parse_kwargs(self, min_frequency=None, max_frequency=None, band='any', top20=None, chemical_name=None, chem_re_flags=0, energy_min=None, energy_max=None, energy_type=None, intensity_lower_limit=None, intensity_type=None, transition=None, version=None, exclude=None, only_NRAO_recommended=None, line_lists=None, line_strengths=None, energy_levels=None, export=None, export_limit=None, noHFS=None, displayHFS=None, show_unres_qn=None, show_upper_degeneracy=None, show_molecule_tag=None, show_qn_code=None, show_lovas_labref=None, show_lovas_obsref=None, show_orderedfreq_only=None, show_nrao_recommended=None, fel=None): # set the payload of the http request payload = {'submit': 'Search', 'frequency_units': 'GHz', } # set the band or frequency ranges if band != 'any': if band not in self.FREQUENCY_BANDS: raise ValueError("Invalid frequency band.") if min_frequency is not None or max_frequency is not None: logging.warning("Band was specified, so the frequency specification is overridden") payload['band'] = band elif min_frequency is not None and max_frequency is not None: if min_frequency > max_frequency: min_frequency, max_frequency = max_frequency, min_frequency #ASSUMES UNITS OF GHz payload['from'] = min_frequency payload['to'] = max_frequency # select the top20 list if given if top20 is not None and len(top20) > 0: if top20 in self.TOP20_LIST: payload['top20'] = top20 else: raise ValueError("Top20 is not one of the allowed values") # set the chemical name if given elif chemical_name in ('', {}, (), [], set()): # include all payload['sid[]'] = [] elif chemical_name is not None: # encode the chemical names to species id's species_ids = self.get_species_ids(chemical_name, chem_re_flags) if len(species_ids) == 0: raise ValueError("No matching chemical species found.") payload['sid[]'] = list(species_ids.values()) # set the min an max energy range and untis if energy_min is not None: payload['energy_range_from'] = float(energy_min) if energy_max is not None: payload['energy_range_to'] = float(energy_max) if energy_type is not None: payload['energy_range_type'] = energy_type # set the minimum intensity and units if intensity_type is not None: payload['lill'] = 'lill_' + intensity_type if intensity_lower_limit is not None: payload[payload['lill']] = intensity_lower_limit # set the specific transition if transition is not None: payload['tran'] = transition # set the version id if version in self.versions: payload['version'] = version elif version is not None: raise ValueError("Invalid version specified. Allowed versions are {vers}".format(vers=str(self.versions))) # set which types to exclude if exclude is not None: for e in exclude: payload['no_' + e] = 'no_' + e # set the NRAO recommended frequencies option if only_NRAO_recommended: payload['include_only_nrao'] = 'include_only_nrao' # set any line lists to use if line_lists is not None and len(line_lists) > 0: if type(line_lists) not in (tuple, list): raise TypeError("Line lists should be a list of linelist names. See Splatalogue.ALL_LINE_LISTS") for L in self.ALL_LINE_LISTS: kwd = 'display' + L if L in line_lists: payload[kwd] = kwd else: payload[kwd] = '' # set which line strengths to return if line_strengths is not None: for LS in line_strengths: payload[LS] = LS # set which energy levels and units to return if energy_levels is not None: for EL in energy_levels: payload[EL] = EL # set other query parameters for b in "noHFS,displayHFS,show_unres_qn,show_upper_degeneracy,show_molecule_tag,show_qn_code,show_lovas_labref,show_orderedfreq_only,show_lovas_obsref,show_nrao_recommended,fel".split(","): if locals()[b]: payload[b] = b # default arg, unmodifiable... payload['jsMath'] = 'font:symbol,warn:0' payload['__utma'] = '' payload['__utmc'] = '' # if we are exporting, set the format if export: payload['submit'] = 'Export' payload['export_delimiter'] = 'colon' # or tab or comma payload['export_type'] = 'current' payload['offset'] = 0 payload['range'] = 'on' if export_limit is not None: payload['limit'] = export_limit else: payload['limit'] = self.LINES_LIMIT return payload def _validate_kwargs(self, min_frequency=None, max_frequency=None, band='any'): """ Check that either min_frequency + max_frequency or band are specified. """ if band == 'any': if min_frequency is None or max_frequency is None: raise ValueError("Must specify either min/max frequency or a valid Band.")
[docs] def query_lines(self, min_frequency=None, max_frequency=None, **kwargs): """ Query Splatalogue for transitions between min_frequency and max_frequency with the given kwargs. Parameters ---------- min_frequency : float The starting frequency for the search in GHz. Default: None max_frequency : float The ending frequency for the search in GHz. Default: None kwargs : dict Any additional arguments for the search. Returns ------- response : `requests.Response` The response of the HTTP request. """ # have to chomp this kwd here... if "min_frequency" in kwargs: min_frequency = kwargs["min_frequency"] if "max_frequency" in kwargs: max_frequency = kwargs["max_frequency"] get_query_payload = (kwargs.pop('get_query_payload') if 'get_query_payload' in kwargs else False) self._validate_kwargs(min_frequency=min_frequency, max_frequency=max_frequency, **kwargs) if hasattr(self, 'data'): data_payload = self.data.copy() data_payload.update(self._parse_kwargs(min_frequency=min_frequency, max_frequency=max_frequency, **kwargs)) else: data_payload = self._default_kwargs() data_payload.update(self._parse_kwargs(min_frequency=min_frequency, max_frequency=max_frequency, **kwargs)) if get_query_payload: return data_payload #Need to pass True so that the sid[] list is handled correctly urlparams = urllib.urlencode(data_payload, True) response = urllib2.urlopen(self.QUERY_URL + '?%s' % urlparams, timeout=self.TIMEOUT) self.response = response return response
[docs] def parse_response(self): """ Parse a response (of type urllib2.urlopen) into a list of dictionaries. Parameters ---------- None Returns ------- A dictonary of the response """ result = [] response = self.response try: csvstring = response.read() tablelist = csvstring.split('\n') header = tablelist[0] headerlist = header.split(':') headerlist = clean_column_headings(headerlist, renaming_dict=column_headings_map) #Populate list of dictionaries for row in tablelist[1:-1]: rowlist = row.split(':') rowdict = {} for i in range(0, len(rowlist)): rowdict[headerlist[i]] = rowlist[i] result.append(rowdict) except: logging.warning("Problem parsing result") self.result = result return result