""" .. _admitsegment:
ADMITSegmentFinder --- Finds segments of emission within a spectrum.
--------------------------------------------------------------------
This module defines the class for spectral line segment detection.
"""
# system imports
import math
import numpy as np
import numpy.ma as ma
# ADMIT imports
from admit.util import stats
from admit.util import utils
from admit.util import Segments
from admit.util.AdmitLogging import AdmitLogging as logging
[docs]class ADMITSegmentFinder(object):
"""Define segments of 'line' emission where segments from data appear to be
above a threshold. This routine comes out of ASTUTE, some parameters below
are hardcoded.
Parameters
----------
freq : float array
Frequencies, GHz.
spec : float array
The y component of the spectrum (arbitrary units, could be linear,
could be log).
f : float
Robustness parameter [1.5].
pmin : float
Signal above cuttoff, rmean+pmin*rstd.
minchan : int
Minimum number of channels needed for a line.
maxgap : int
Allowed channels with signal below cutoff [0].
nomean : bool
Remove the mean from any noise and cutoff calculations. This is useful
if PVCorr based spectra are being processed to get a correct noise level.
Default: False.
Returns
-------
4-tuple
channel_segments[], cutoff, noise, mean
"""
###
### @todo symmetrize the attributes with the vals[]list ? -> spec and nsigma not set
###
def __init__(self, **keyval):
self.peak = None ##
self.f = 1.5 ## standard robust
self.hanning = False # @todo not in vals[]
self.bottom = True ##
self.abs = False ##
self.loglevel = 20 # @todo why it's own loglevel? do we still need this
self.pvc = False ##
self.freq = [] ##
# self.spec = [] ##
self.maxgap = 0 ## 0 the default ?
self.minchan = 3 ##
self.pmin = 0.0 ## 0.0 the default ?
self.nomean = False ## this is the csub parameter in e.g. LineID, another beautiful double negative here
self.noise = None ##
# self.nsigma ## is this pmin? shouldn't nsigma be gone from the vals[] list?
self.vals = ["freq", "spec", "pmin", "peak", "f", "nsigma",\
"minchan", "maxgap", "abs", "pvc", "bottom", "nomean",\
"noise"]
self.set_options(**keyval)
[docs] def line_segments(self, spec, cutoff):
""" Method to find segments that where lines are located
[ [start1,end1], [start2,end2], ....]
This function assumes you've subtracted a possible continuum (see the nomean
parameter), and applied the absolute value, because this routine only
segments above the cutoff.
Need some explanation on the use of abs here.
Parameters
----------
spec : numpy array (with a mask)
The spectrum to analyze
cutoff : float
The cutoff to use (line segments must be higher than this value)
Returns
-------
List of segment start and end channels, e.g. [[10,20],[30,40]]
"""
def index(w, start, value):
""" Method to find the index of a specific value, starting at
a specified index
w : list
The list to search for the value
start : int
Index to start searching at
value :
The value to search for
Returns
-------
Int containing the matching index, or -1 if not found.
"""
#@todo
# how about something cleaner:
# try:
# return w[start:].index(value)+start
# except ValueError:
# return -1
n = len(w)
i = start
while i < n:
if w[i] == value:
return i
i = i + 1
return -1
def setval(w, start, end, value):
""" Method to set a range of indices to a specific value
Parameters
----------
w : list
The list to modify
start : int
The index to start at
end : int
The index to end at
value : varies
The value to insert into to specified indicies
Returns
-------
None
"""
for i in range(start, end):
w[i] = value
# -- start of line_segment --
#print "PJT line_segment: ",spec.min(),spec.max(),cutoff,self.minchan,self.maxgap,self.abs
s = [] # accumulate segments
n = len(spec)
w = range(n) # @todo use masking operations, so no loops are needed. this now should work though.
if True:
# here's the old one
w1 = [-1] * n
for i in range(n):
if not self.abs and abs(spec[i]) < cutoff:
w1[i] = 0
elif self.abs and spec[i] < cutoff:
w1[i] = 0
else:
if spec.mask[i]:
w1[i] = 0
else:
w1[i] = 1
#print "PJTW1:",w1
if False:
# here's the new one
w2 = [-1] * n
if self.abs:
for i in range(n):
if spec.mask[i]:
w2[i] = 0
continue
if abs(spec[i]) < cutoff:
w2[i] = 0
else:
w2[i] = 1
else:
for i in range(n):
if spec.mask[i]:
w2[i] = 0
continue
if spec[i] < cutoff:
w2[i] = 0
else:
w2[i] = 1
sum0 = abs(np.array(w2)-np.array(w1)).sum()
#print "PJTW2:",w2,sum0
# pick your method (w1 = original, w2 = peter's )
w = w1
#
i0 = 0
while i0 >= 0:
i1 = index(w, i0, 1)
if i1 < 0:
break
t = index(w, i1 + 1, 1)
if t - i1 > 1:
i0 = i1 + 1
continue
i2 = index(w, i1, 0)
if i2 < 0:
break
i3 = index(w, i2, 1)
if i3 < 0:
il = i2 - i1
if il >= self.minchan:
s.append([i1, i2 - 1])
break
i4 = index(w, i3, 0)
if i4 < 0:
i4 = n - 1
#
ig = i3 - i2
if (ig <= self.maxgap and i4 - i3 > self.minchan) or ig <= 1:
# fill the gap, it's small
setval(w, i2, i3, 1)
i0 = i1
continue
else:
il = i2 - i1
if il >= self.minchan:
s.append([i1, i2 - 1])
i0 = i2
continue
return s
[docs] def set_options(self, **keyval):
""" Set the options for the line finding algorithm.
Parameters
----------
freq : float array
Frequencies, GHz.
spec : float array
The y component of the spectrum (arbitrary units, could
be linear, could be log)
f : float
Robustness parameter [1.5]
pmin : float
Signal above cuttoff, rmean+pmin*rstd
minchan : int
Minimum number of channels needed for a line
maxgap : int
Allowed channels with signal below cutoff [0]
"""
for v in self.vals:
try:
setattr(self, v, keyval[v])
except KeyError:
pass
[docs] def find(self):
""" Method that does the segment finding
Parameters
----------
None
Returns
-------
Tuple containing a list of the segments, the cutoff used, the
noise level, and a mean baseline.
"""
if self.abs:
self.spec = abs(self.spec)
temp = np.zeros(len(self.spec))
#self.see = ma.masked_array(temp, mask=self.spec.mask)
# parameters (some now from the function argument)
logging.debug("MIN/MAX " + str(self.spec.min()) + " / " +\
str(self.spec.max()))
n = len(self.spec) # data and freq assumed to be same size
if self.hanning:
h = np.hanning(5) / 2 # normalize the convolution array
h = np.array([0.25, 0.5, 0.25])
data2 = np.convolve(self.spec, h, 'same')
else:
data2 = self.spec
if len(data2) != len(self.freq):
raise Exception("ulines: data2 and freq not same array")
# do the work
dr = stats.robust(data2, self.f)
noise = dr.std()
logging.debug("ROBUST: (mean/median/noise) " + \
str(dr.mean()) + " / " + str(ma.median(dr)) + " / " + str(noise))
#print "\n\nD2",data2,"\n"
data3 = ma.masked_invalid(data2)
#print "\n\nD3\n",data3,"\n"
ddiff = data3[1:n] - data3[0:n-1]
logging.debug("DIFF: (mean/stdev) " + str(ddiff.mean()) +\
" / " + str(ddiff.std()))
#print "\n\n",ddiff,"\n",self.f,"\n"
ddr = stats.robust(ddiff, self.f)
logging.debug("RDIFF: (mean/median/stdev) " + \
str(ddr.mean()) + " / " + str(ma.median(ddr)) + " / " + \
str(ddr.std()))
#plt.show()
if self.bottom:
# first remind the classic
dmean1 = dr.mean()
dstd1 = dr.std()
logging.debug("CLASSIC MEAN/SIGMA: " + str(dmean1) + \
" / " + str(dstd1))
# see if we can find a better one?
# k should really depend on nchan, (like an nsigma), 2-3 should be ok for most.
k = 2.5
dmin = dr.min()
dmax = dr.min() + 2 * k * ddr.std() / 1.414214
logging.debug("DMIN/DMAX: " + str(dmin) + " / " + \
str(dmax))
dm = ma.masked_outside(dr, dmin, dmax)
dmean = max(0.0, dm.mean()) # ensure that the mean is positive or 0.0
dstd = dm.std()
if self.noise is not None:
cutoff = self.pmin * self.noise
elif self.nomean:
cutoff = self.pmin * dstd
else:
cutoff = dmean + self.pmin * dstd
logging.debug("BETTER MEAN/SIGMA: " + str(dmean) + \
" / " + str(dstd))
else:
# classic simple, but fails when robust didn't kill off (enough of) the signal
# sigma will be too high, cutoff too high and you could have no lines if there
# is one strong lines
dmean = dr.mean()
dstd = dr.std()
if self.noise is not None:
cutoff = self.pmin * self.noise
elif self.nomean:
cutoff = self.pmin * dstd
else:
cutoff = dmean + self.pmin * dstd
logging.debug("CLASSIC MEAN/SIGMA: " + str(dmean) + \
" / " + str(dstd))
logging.debug("SEGMENTS: f=%g pmin=%g maxgap=%d minchan=%d" % \
(self.f, self.pmin, self.maxgap, self.minchan))
#print "\nDATA\n\n",data2,"\n\n"
segments = self.line_segments(data2, cutoff)
#print "SEGMENTS",segments
nlines = len(segments)
logging.debug("Found %d segments above cutoff %f" % \
(nlines, cutoff))
segp = []
rmax = data2.max() + 0.1 # + 0.05*(data2.max()-data2.min())
segp.append([self.freq[0], self.freq[n - 1], cutoff, cutoff])
segp.append([self.freq[0], self.freq[n - 1], dmean, dmean])
for (l, s) in zip(range(nlines), segments):
ch0 = s[0]
ch1 = s[1]
sum0 = sum(data2[ch0:ch1+1])
sum1 = sum(self.freq[ch0:ch1+1] * data2[ch0:ch1+1])
sum2 = sum(self.freq[ch0:ch1+1] * self.freq[ch0:ch1+1] * data2[ch0:ch1+1])
lmean = sum1 / sum0
# this fails for weaker lines, so wrapped it in a abs
lsigma = math.sqrt(abs(sum2 / sum0 - lmean * lmean))
lmax = max(data2[ch0:ch1+1])
if self.peak != None:
lpeak = 1000*max(self.peak[ch0:ch1+1])
else:
lpeak = max(self.spec[ch0:ch1+1])
# @todo if we ever allow minchan=1 lsigma would be 0.0.... should we adopt channel width?
lfwhm = 2.355 * lsigma / lmean * utils.c
logging.debug(
"Line in %2d channels %4d - %4d @ %.4f GHz +/- %.4f GHz log(S/N) = %.2f FWHM %5.1f km/s %.2f" % \
(ch1 - ch0 + 1, ch0, ch1, lmean, lsigma, lmax, lfwhm, lpeak))
segp.append([self.freq[ch0], self.freq[ch1], rmax, rmax])
segp.append([lmean, lmean, rmax - 0.1, rmax + 0.05])
return Segments.Segments(segments, nchan=len(self.spec)), cutoff, dstd, dmean