Source code for admit.util.peakfinder.PeakUtils

""" .. _peakutils:

    PeakUtils --- A peak finding algorithm.
    ---------------------------------------

    A version of the PyPi PeakUtils, converted from Python3.4 to Python2.7.
"""

# The MIT License (MIT)
#
# Copyright (c) 2014 Lucas Hermann Negri
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


from __future__ import division
from __future__ import absolute_import
import numpy as np
import math
import types
import matplotlib.pyplot as plt
try:
  from scipy import optimize
  import scipy.linalg as LA
except:
  print "WARNING: No scipy; PeakUtils utility cannot function."



[docs]class PeakUtils(object):
    """ PeakUtils peak finding algorithm

        Parameters
        ----------
        spec : List or numpy array
            The spectrum to be analyzed.

        x : List or numpy array, optional
            The x co-ordinates for the spectrum.
            Default = None.

        kwarg : Dict
            Any additional arguments, see the Attributes list for a complete
            listing.

        Attributes
        ----------
        spec : numpy array
            The spectrum to be analyzed.

        x : numpy array
            The x co-ordinates of the spectrum.

        thres : float
            Threshold for detecting a peak/valley. The absolute value of the intensity
            must be above this value. Default: 0.0.

        min_dist : int
            The minimum distance between peaks in channels.
            Deafult: 5.

        profile : str
            The spectral line profile to use when refining the fits. Choices are
            "gaussian" and "lorentzian".
            Default: "gaussian".

        width : int
            The number of channels on either side of a peak to use when refining
            the fit.
            Default: 10.

        basedeg : int
            Degree of the polynomial that will estimate the data baseline.
            Default: 3.

        baseiter : int
            The maximum number of iterations to perform when trying to fit the
            baseline.
            Default: 100.

        basetol : float
            Tolerance to use when comparing the difference between the current
            fit coefficient and the ones from the last iteration.
            Default: 1e-3.

        dobase : boolean
            Whether or not to do baseline/continuum subtraction.
            Default: False.
    """
    thres = 0.0
    min_dist = 1
    profile = "gaussian"
    width = 10
    basedeg = 3
    baseiter = 100
    basetol = 1e-3
    dobase = False
    def __init__(self,spec,x = None,**kwarg):
        if type(spec) == types.ListType:
            self.spec = np.array(spec,dtype=float)
        else:
            self.spec = spec.astype(float)
        if x == None:
            self.x = np.arange(float(spec.shape[0]))
        else:
            if type(x) == types.ListType:
                self.x = np.array(x,dtype=float)
            else:
                self.x = x.astype(float)
        for k,v, in kwarg.iteritems():
            # ingore any attributes we don't have
            if hasattr(self,k):
                if type(getattr(self,k)) != type(v):
                    raise Exception,"Cannot change the type of a variable in PeakUtils. %s is of type %s, not %s." % (k,type(getattr(self,k)),type(v))
                setattr(self,k,v)
        #print self.min_dist

[docs]    def find(self):
        """ Method to find any peaks in the spectrum. A baseline will be subtracted first if requested.

            Parameters
            ----------
            None

            Returns
            -------
            numpy array of floats
                containing the locations of the peaks
        """
        # subtract a baseline if needed
        #fig = plt.figure(20)
        #x = np.arange(len(self.spec))
        #ax1 = fig.add_subplot(1,1,1)
        #ax1.set_title("BEFORE")
        #ax1.plot(x,self.spec,"-")
        #plt.show()

        if self.dobase:
            self.baseline(self.basedeg,self.baseiter,self.basetol)
        #fig2 = plt.figure(21)
        #x = np.arange(len(self.spec))
        #ax2 = fig2.add_subplot(1,1,1)
        #ax2.set_title("AFTER")
        #ax2.plot(x,self.spec,"-")
        #plt.show()

        # do an initial cut
        ind = self.indexes(self.thres,self.min_dist)
        # now refine the peaks
        if "gauss" in self.profile:
            fit = "gaussian_fit"
        else:
            fit = "lorentzian_fit"
        refined = self.interpolate(ind,self.width,fit)
        return refined

[docs]    def indexes(self,thres=0.0, min_dist=5):
        """Peak detection routine.

            Finds the peaks in *y* by taking its first order difference. By using
            *thres* and *min_dist* parameters, it is possible to reduce the number of
            detected peaks.

            Parameters
            ----------
            thres : float
                Threshold for detecting a peak/valley. The absolute value of the intensity
                must be above this value. Default: 0.0

            min_dist : int
                Minimum distance between each detected peak. The peak with the highest
                amplitude is preferred to satisfy this constraint.

            Returns
            -------
            ndarray
                Array containing the indexes of the peaks that were detected
        """
        #thres *= np.max(self.spec) - np.min(self.spec)

        # find the peaks by using the first order difference
        dy = np.diff(self.spec)
        peaks = np.where((np.hstack([dy, 0.]) < 0.)
                         & (np.hstack([0., dy]) > 0.)
                         & (np.absolute(self.spec) > thres))[0]
        #print "\n\n",peaks,peaks.size,"\n\n"
        if peaks.size > 1 and min_dist > 1:
            highest = peaks[np.argsort(self.spec[peaks])][::-1]
            rem = np.ones(self.spec.size, dtype=bool)
            rem[peaks] = False

            for peak in highest:
                if not rem[peak]:
                    sl = slice(max(0, peak-min_dist), peak+min_dist+1)
                    rem[sl] = True
                    rem[peak] = False

            peaks = np.arange(self.spec.size)[~rem]

        #print "\n\n",peaks,peaks.size,"\n\n"

        return peaks


[docs]    def centroid(self,chans=[0,-1]):
        """ Computes the centroid for the specified data.

            Parameters
            ----------
            chans : list
                The indexes to start and end at.
                Default: [0,-1]

            Returns
            -------
            float
                Centroid of the data.
        """
        ydata = self.spec[chans[0]:chans[1]+1]
        xdata = self.x[chans[0]:chans[1]+1]
        return np.sum(xdata*ydata)/np.sum(ydata)


[docs]    def gaussian(self,x, ampl, center, dev):
        """Computes the Gaussian function.

            Parameters
            ----------
            x : float
                Point to evaluate the Gaussian for.

            ampl : float
                Amplitude.

            center : float
                Center.

            dev : float
                Width.

            Returns
            -------
            float
                Value of the specified Gaussian at *x*
        """
        return ampl * np.exp(-(x-center)**2 / (2*dev**2))


[docs]    def gaussian_fit(self,chans=[0,-1]):
        """ Performs a Gaussian fitting of the specified data.

            Parameters
            ----------
            chans : list
                The indexes to start and end at.
                Default: [0,-1]

            Returns
            -------
            ndarray
                Parameters of the Gaussian that fits the specified data
        """
        ydata = self.spec[chans[0]:chans[1]+1]
        xdata = self.x[chans[0]:chans[1]+1]
        initial = [np.max(ydata), xdata[0], (xdata[1]-xdata[0])*5]
        _3to2list = list(optimize.curve_fit(self.gaussian, xdata, ydata, initial))
        params, _, = _3to2list[:1] + [_3to2list[1:]]
        return params[1]

[docs]    def lorentzian(self,x, ampl, center, width):
        """Computes the Lorentzian function.

            Parameters
            ----------
            x : float
                Point to evaluate the Gaussian for.

            ampl : float
                Amplitude.

            center : float
                Center.

            width : float
                Width.

            Returns
            -------
            float
                Value of the specified Gaussian at *x*
        """
        return (ampl/math.pi) * (width/2.0)/(math.pow(x-center,2) + math.pow(width/2.0,2))


[docs]    def laurentzian_fit(self,chans=[0,-1]):
        """ Performs a Lorentzian fitting of the specified data.

            Parameters
            ----------
            chans : list
                The indexes to start and end at.
                Default: [0,-1]

            Returns
            -------
            ndarray
                Parameters of the Lorentzian that fits the specified data
        """
        ydata = self.spec[chans[0]:chans[1]+1]
        xdata = self.x[chans[0]:chans[1]+1]
        initial = [np.max(ydata), xdata[0], (xdata[1]-xdata[0])*5]
        _3to2list = list(optimize.curve_fit(self.lorentzian, xdata, ydata, initial))
        params, _, = _3to2list[:1] + [_3to2list[1:]]
        return params[1]

[docs]    def interpolate(self, ind=None, width=10, func="gaussian_fit"):
        """ Tries to enhance the resolution of the peak detection by using
            Gaussian fitting, centroid computation or an arbitrary function on the
            neighborhood of each previously detected peak index.

            Parameters
            ----------
            ind : ndarray
                Indexes of the previously detected peaks. If None, indexes() will be
                called with the default parameters.

            width : int
                Number of points (before and after) each peak index to pass to *func*
                in order to increase the resolution in *x*.

            func : function(x,y)
                Function that will be called to detect an unique peak in the x,y data.

            Returns
            -------
            ndarray :
                Array with the adjusted peak positions (in *x*)
        """
        if ind is None:
            ind = indexes(self.spec)

        out = []
        for i in ind:
            try:
                fit = getattr(self,func)([i-width,i+width+1])
                out.append(fit)
            except:
                pass

        return np.array(out)

[docs]    def baseline(self,deg=3, max_it=100, tol=1e-3):
        """ Computes the baseline of a given data.

            Iteratively performs a polynomial fitting in the data to detect its
            baseline. At every iteration, the fitting weights on the regions with
            peaks is reduced to identify the baseline only.

            Parameters
            ----------
            deg : int
                Degree of the polynomial that will estimate the data baseline. A low
                degree may fail to detect all the baseline present, while a high
                degree may make the data too oscillatory, especially at the edges.

            max_it : int
                Maximum number of iterations to perform.

            tol : float
                Tolerance to use when comparing the difference between the current
                fit coefficient and the ones from the last iteration. The iteration
                procedure will stop when the difference between them is lower than
                *tol*.

            Returns
            -------
            ndarray
                Array with the baseline amplitude for every original point in *y*
        """
        order = deg+1
        coeffs = np.ones(order)

        # try to avoid numerical issues
        cond = math.pow(self.spec.max(), 1./order)
        x = np.linspace(0., cond, self.spec.size)
        base = self.spec.copy()

        vander = np.vander(x, order)
        vander_pinv = LA.pinv2(vander)

        for _ in xrange(max_it):
            coeffs_new = np.dot(vander_pinv, self.spec)

            if LA.norm(coeffs_new-coeffs) / LA.norm(coeffs) < tol:
                break

            coeffs = coeffs_new
            base = np.dot(vander, coeffs)
            self.spec = np.minimum(self.spec, base)

        return base

[docs]    def scale(self,x, new_range=(0., 1.), eps=1e-9):
        """ Changes the scale of an array

            Parameters
            ----------
            x : ndarray
                1D array to change the scale (remains unchanged)

            new_range : tuple (float, float)
                Desired range of the array

            eps: float
                Numerical precision, to detect degenerate cases (for example, when
                every value of *x* is equal)

            Returns
            -------
            ndarray
                Scaled array
            tuple (float, float)
                Previous data range, allowing a rescale to the old range
        """
        assert new_range[1] >= new_range[0]
        range_ = (x.min(), x.max())

        if (range_[1] - range_[0]) < eps:
            mean = (new_range[0] + new_range[1]) / 2.0
            xp = np.full(x.shape, mean)
        else:
            xp = (x - range_[0])
            xp *= (new_range[1] - new_range[0]) / (range_[1] - range_[0])
            xp += new_range[0]

        return xp, range_