/* vlfdlib.c

A Python/Numpy C extension module for vLFD, the virtual Laboratory for
Fluorescence Dynamics.

Authors:
  Christoph Gohlke, Laboratory for Fluorescence Dynamics.

Copyright (c) 2007-2008, The Regents of the University of California
Produced by the Laboratory for Fluorescence Dynamics.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright
  notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
  notice, this list of conditions and the following disclaimer in the
  documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holders nor the names of any
  contributors may be used to endorse or promote products derived
  from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/

/*****************************************************************************/
/* setup.py

"""A Python script to build the _vlfdlib extension module.

Usage:: ``python setup.py build_ext --inplace``

"""

from distutils.core import setup, Extension

include_dirs=[
    "C:/Python25/Lib/site-packages/numpy/core/include",
    "/usr/lib/python2.5/site-packages/numpy/core/include",
    "/Library/Frameworks/Python.framework/Versions/2.5/lib/python2.5"
    "/site-packages/numpy/core/include"]

setup(name='__vlfdlib', ext_modules=[Extension('_vlfdlib', ['vlfdlib.c'],
    include_dirs=include_dirs, extra_compile_args=['-O2','-G6'])],)

******************************************************************************/

#define _VERSION_ "20080613"

#define WIN32_LEAN_AND_MEAN

#include "Python.h"
#include "math.h"
#include "float.h"
#include "string.h"
#include "numpy/arrayobject.h"
#include "numpy/ufuncobject.h"

#ifndef M_PI
#define M_PI (3.1415926535897932384626433832795)
#endif
#define M_2PI (6.283185307179586476925286766559)

#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#define SWAP(a,b) { register double t=(a); (a)=(b); (b)=t; }

/*****************************************************************************/
/* C functions

Input validation and error handling is done in the Python wrapper functions.
*/

/*
Fast rounding of floating point to integer numbers.
*/
#ifdef WIN32

int floor_int(double x)
{
    int i;
    const double _f = -0.5f;
    __asm {
        fld x
        fadd st, st(0)
        fadd _f
        fistp i
        sar i, 1
    }
    return i;
}

int ceil_int(double x)
{
    int i;
    const double _f = -0.5f;
    __asm {
        fld x
        fadd st, st(0)
        fsubr _f
        fistp i
        sar i, 1
    }
    return (-i);
}

#else
#define floor_int(x) (int)floor((double)(x))
#define ceil_int(x) (int)ceil((double)(x))
#endif

/*
Multiply quaternions.
*/
static void quaternion_multiply(double *q1, double *q0, double *qq)
{
    double x0 = *q0++;
    double y0 = *q0++;
    double z0 = *q0++;
    double w0 = *q0;
    double x1 = *q1++;
    double y1 = *q1++;
    double z1 = *q1++;
    double w1 = *q1;
    *qq++ = x1*w0 + y1*z0 - z1*y0 + w1*x0;
    *qq++ = y1*w0 + z1*x0 + w1*y0 - x1*z0;
    *qq++ = x1*y0 - y1*x0 + z1*w0 + w1*z0;
    *qq   = w1*w0 - x1*x0 - y1*y0 - z1*z0;
}

/* Convert between rotation matrix and quaternion.

Ken Shoemake. "Quaternions".
JMP van Waveren. "From Quaternion to Matrix and Back", 2005.
*/
static void quaternion_to_matrix(double *q, double *m)
{
    double x = *q++;
    double y = *q++;
    double z = *q++;
    double w = *q;
    double n = x*x + y*y + z*z + w*w;

    if (n == 0.0) {
        /* return identity matrix */
        memset(m, 0, 16*sizeof(double));
        m[0] = m[5] = m[10] = m[15] = 1.0;
        return;
    }

    /* unoptimized code
    {
        double s = (2.0 / n);
        double xs = x*s,  ys = y*s,  zs = z*s;
        double xx = x*xs, xy = x*ys, xz = x*zs;
        double xw = w*xs, yy = y*ys, yz = y*zs;
        double yw = w*ys, zz = z*zs, zw = w*zs;

        *m++ = 1.0-yy-zz; *m++ = xy-zw;     *m++ = xz+yw;     *m++ = 0.0;
        *m++ = xy+zw;     *m++ = 1.0-xx-zz; *m++ = yz-xw;     *m++ = 0.0;
        *m++ = xz-yw;     *m++ = yz+xw;     *m++ = 1.0-xx-yy; *m++ = 0.0;
        *m++ = 0.0;       *m++ = 0.0;       *m++ = 0.0;       *m = 1.0;
    }
    */

    /* normalize quaternion */
    n = 1.0 / sqrt(n);
    x *= n; y *= n; z *= n; w *= n;
    /* optimized FPU code */
    {
        double x2 = x+x;
        double y2 = y+y;
        double z2 = z+z;
        {
            double xx2 = x*x2;
            double yy2 = y*y2;
            double zz2 = z*z2;
            m[0]  = 1.0 - yy2 - zz2;
            m[5]  = 1.0 - xx2 - zz2;
            m[10] = 1.0 - xx2 - yy2;
        }
        {
            double yz2 = y*z2;
            double wx2 = w*x2;
            m[6] = yz2 - wx2;
            m[9] = yz2 + wx2;
        }
        {
            double xy2 = x*y2;
            double wz2 = w*z2;
            m[1] = xy2 - wz2;
            m[4] = xy2 + wz2;
        }
        {
            double xz2 = x*z2;
            double wy2 = w*y2;
            m[8] = xz2 - wy2;
            m[2] = xz2 + wy2;
        }
        m[3] = m[7] = m[11] = m[12] = m[13] = m[14] = 0.0;
        m[15] = 1.0;
    }
}

static void quaternion_from_matrix(double *m, double *q)
{
    double s;
    if ((m[0] + m[5] + m[10]) > 0.0) {
        s = 0.5 / sqrt(m[0] + m[5] + m[10] + m[15]);
        q[3] = 0.25 / s;
        q[2] = (m[4]-m[1]) * s;
        q[1] = (m[2]-m[8]) * s;
        q[0] = (m[9]-m[6]) * s;
    } else if ((m[0] > m[5]) && (m[0] > m[10])) {
        s = 0.5 / sqrt(m[0] - (m[5] + m[10]) + m[15]);
        q[0] = 0.25 / s;
        q[1] = (m[4]+m[1]) * s;
        q[2] = (m[2]+m[8]) * s;
        q[3] = (m[9]-m[6]) * s;
    } else if (m[5] > m[10]) {
        s = 0.5 / sqrt(m[5] - (m[0] + m[10]) + m[15]);
        q[1] = 0.25 / s;
        q[0] = (m[4]+m[1]) * s;
        q[3] = (m[2]-m[8]) * s;
        q[2] = (m[9]+m[6]) * s;
    } else {
        s = 0.5 / sqrt(m[10] - (m[0] + m[5]) + m[15]);
        q[2] = 0.25 / s;
        q[3] = (m[4]-m[1]) * s;
        q[0] = (m[2]+m[8]) * s;
        q[1] = (m[9]+m[6]) * s;
    }
    if (m[15] != 1.0) {
        s = 1.0 / sqrt(m[15]);
        q[0] *= s; q[1] *= s; q[2] *= s;  q[3] *= s;
    }
}

/*
Calculate overlapping normal distributions.
*/
static void
multi_modal_normal(
    long modes,
    double *means,
    double *amplitudes,
    double *variances,
    long size,
    double *xaxis,
    double *result)
{
    double xmin, xmax, m, v, f, x, scale;
    long j, i;
    if (size > 1) {
        scale = (xaxis[size-1] - xaxis[0])/(double)(size-1);
    } else {
        scale = 1.0;
    }
    for(j = 0; j < modes; j++) {
        m = means[j];
        v = -0.5 / variances[j];
        f = amplitudes[j] * scale * sqrt(-v / M_PI);
        xmax = -25.0 / v;
        xmin = m - xmax;
        i = 0;
        /* only consider the mean+-50*variance interval */
        while((i < size) && (xaxis[i] < xmin)) {
            i++;
        }
        while((i < size) && ((x=xaxis[i]-m) < xmax)) {
            result[i++] += f * exp(x*x*v);
        }
    }
}

/*
Convolve instrument response function (kernel) with time-domain decay.
*/
static void
convolve_decay(
    long klen,
    double *kernel,
    long dlen,
    double *data,
    double *convolved)
{
    double k;
    long i = 0;
    long j;

    while(klen && (kernel[--klen] == 0.0))
    while(i < klen && kernel[i] == 0.0) {
        i++;
    }

    for(; i <= klen; i++) {
        k = kernel[i];
        if (k != 0.0) {
            for(j = i; j < dlen; j++) {
                convolved[j] += k * data[j-i];
            }
        }
    }
}

/*
Calculate multi-exponential decay from lifetime distribution.
*/
static void
exponential_decay(
    long numlifs,
    double *lifetimes,
    double *amplitudes,
    long numtimes,
    double *times,
    int folds,
    double *result)
{
    double a, l, t0;
    long i, j, k;

    if (folds < 2) {
        for(j = 0; j < numlifs; j++) {
            a = amplitudes[j];
            l = -lifetimes[j];
            for(i = 0; i < numtimes; i++) {
                result[i] += a*exp(times[i]/l);
            }
        }
    } else {
        for (k = 0; k < folds; k++) {
            t0 = k*(times[numtimes-1]+times[1]);
            for(j = 0; j < numlifs; j++) {
                a = amplitudes[j];
                l = -lifetimes[j];
                for(i = 0; i < numtimes; i++) {
                    result[i] += a*exp((times[i]+t0)/l);
                }
            }
        }
    }
}

/*
Apply Holst exponential smoother.
*/
static void
exponential_smoother(
    long size,
    char *data,
    char *out,
    long dstride,
    long ostride,
    double alpha)
{
    long s = size-1;
    double b = 1.0 - alpha;
    double t = *((double*) data);
    *((double*) out) = t;
    while(s--) {
        data += dstride;
        out += ostride;
        t = b*(*((double*) data)) + alpha*t;
        *((double*) out) = t;
    }
    s = size-1;
    while(s--) {
        out -= ostride;
        t = b*(*((double*) out)) + alpha*t;
        *((double*) out) = t;
    }
}

/* Akima interpolation.

Akima, H. (1970) A new method of interpolation and smooth curve fitting based
on local procedures, J. ACM 17(4), 589-602.
*/

static void
akima_interpolation(
    long si,            /* size of input arrays */
    char *xi, long dxi, /* x coordinates and stride */
    char *yi, long dyi, /* y coordinates and stride */
    long so,            /* size of output arrays */
    char *xo, long dxo, /* x coordinates of output and stride */
    char *yo, long dyo, /* y output coordinates and stride */
    double *p           /* buffer for polynomial coefficients */
                        /* size must be 4*si+4 */
    )
{
    long i, s;
    double x0, x1, x2, x3;       /* extrapolated x values */
    double y0, y1, y2, y3;       /* extrapolated y values */
    double t0, t1, t2, t3;       /* temporary values */
    double d0, d1;
    double g0, g1;               /* gradients at extrapolated values */
    double *p0, *p1, *p2, *p3;   /* buffer pointers. p3 holds slopes */
    char *pxi, *pyi, *pxo, *pyo; /* data pointers */

    p0 = p;
    p1 = p+si+1;
    p2 = p+si*2+2;
    p3 = p+si*3+3;

    /* slopes of input data */
    pxi = xi;
    pyi = yi;
    t0 = *((double*) pxi);
    t1 = *((double*) pyi);
    i = si-1;
    while (i--) {
        pxi += dxi;
        pyi += dyi;
        *p3++ = (*((double*) pyi) - t1) / (*((double*) pxi) - t0);
        t0 = *((double*) pxi);
        t1 = *((double*) pyi);
    }
    p3 = p+si*3+3;

    /* extrapolate 2 points on left side */
    t0 = *((double*) xi);
    t1 = *((double*) (xi+dxi));
    t2 = *((double*) yi);
    t3 = *((double*) (yi+dyi));

    x1 = t0 + t1 - *((double*) (xi+dxi+dxi));
    x0 = x1 + t0 - t1;
    y1 = (t0-x1)*(p3[1]-2.0*p3[0]) + t2;
    g1 = (t2-y1)/(t0-x1);
    y0 = (x1-x0)*(p3[0]-2.0*g1) + y1;
    g0 = (y1-y0)/(x1-x0);

    /* extrapolate 2 points on right side */
    s = (long)xi + dxi*(si-1);
    t0 = *((double*) (s-dxi));
    t1 = *((double*) s);
    x2 = t1 + t0 - *((double*) (s-dxi-dxi));
    x3 = x2 + t1 - t0;

    s = (long)yi + dyi*(si-1);
    t2 = *((double*) (s-dyi));
    t3 = *((double*) s);
    y2 = (2.0*p3[si-2]-p3[si-3])*(x2-t1) + t3;
    p3[si-1] = (y2-t3)/(x2-t1);
    y3 = (2.0*p3[si-1]-p3[si-2])*(x3-x2) + y2;
    p3[si] = (y3-y2)/(x3-x2);

    /* akima slopes */
    t1 = g0;
    t2 = g1;
    t3 = *p3;
    i = si;
    while (i--) {
        t0 = t1;
        t1 = t2;
        t2 = t3;
        t3 = *(p3+1);
        d0 = t3-t2;
        if (d0 < 0.0) d0*=-1.0;
        d1 = t1-t0;
        if (d1 < 0.0) d1*=-1.0;
        if ((d0+d1) < 1e-9) {
            *p3++ = 0.5 * (t1 + t2);
        } else {
            *p3++ = (d0*t1 + d1*t2) / (d0 + d1);
        }
    }
    /* polynomial coefficients */
    pxi = xi;
    pyi = yi;
    t0 = *((double*) pxi);
    t1 = *((double*) pyi);
    p3 = p+si*3+3;
    g1 = *p3++;
    i = si;
    while (i--) {
        pxi += dxi;
        pyi += dyi;
        d0 = (*((double*) pxi) - t0);
        d1 = (*((double*) pyi) - t1);
        t2 = d1/d0;
        g0 = g1;
        g1 = *p3++;
        *p0++ = t1;
        *p1++ = (3.0*t2 - 2.0*g0 - g1) / d0;
        *p2++ = (g0 + g1 - 2.0*t2) / (d0*d0);
        t0 = *((double*) pxi);
        t1 = *((double*) pyi);
    }
    /* interpolate data */
    p0 = p;
    p1 = p + si + 1;
    p2 = p + si*2 + 2;
    p3 = p + si*3 + 3;
    pxi = xi;
    pyi = yi;
    pxo = xo;
    pyo = yo;
    si -= 2;
    i = -1;
    s = so;
    while (s--) {
        t0 = *((double*) pxo);
        while((t0 > *((double*) pxi)) && (i < si)) {
            pxi += dxi;
            i++;
        }
        if (i < 0) {
            i = 0;
            pxi = xi + dxi;
        }
        t1 = t0 - *((double*) (pxi - dxi));
        *((double*) pyo) = p0[i] + p3[i]*t1 + p1[i]*t1*t1 + p2[i]*t1*t1*t1;
        pyo += dyo;
        pxo += dxo;
    }
}

/*****************************************************************************/
/* Python functions */

/*
Numpy array converters for use with PyArg_Parse functions.
*/
static int
PyDoubleArray_Converter(
    PyObject *object,
    PyObject **address)
{
    PyArrayObject *obj = (PyArrayObject *)object;
    if (PyArray_Check(object) && obj->descr->type_num == PyArray_DOUBLE) {
        *address = object;
        Py_INCREF(object);
        return NPY_SUCCEED;
    } else {
        *address = PyArray_FROM_OTF(object, NPY_DOUBLE, NPY_ALIGNED);
        if (*address == NULL) return NPY_FAIL;
        return NPY_SUCCEED;
    }
}

static int
PyDoubleArray_OutputConverter(
    PyObject *object,
    PyArrayObject **address)
{
    PyArrayObject *obj = (PyArrayObject *)object;
    if (object == NULL || object == Py_None) {
        *address = NULL;
        return NPY_SUCCEED;
    }
    if (PyArray_Check(object) && obj->descr->type_num == PyArray_DOUBLE) {
        *address = (PyArrayObject *)object;
        return NPY_SUCCEED;
    } else {
        PyErr_SetString(PyExc_TypeError,
            "Output array must be an array of type double");
        *address = NULL;
        return NPY_FAIL;
    }
}

static int
PyIntArray_Converter(
    PyObject *object,
    PyObject **address)
{
    PyArrayObject *obj = (PyArrayObject *)object;
    if (PyArray_Check(object) && obj->descr->type_num == PyArray_INT32) {
        *address = object;
        Py_INCREF(object);
        return NPY_SUCCEED;
    } else {
        *address = PyArray_FROM_OTF(object, NPY_INT32, NPY_ALIGNED);
        if (*address == NULL) return NPY_FAIL;
        return NPY_SUCCEED;
    }
}

/*
Python function wrapper for the quaternion_from_matrix() function.
*/
char py_quaternion_from_matrix_doc[] =
    "Return quaternion from rotation matrix.";

static PyObject *
py_quaternion_from_matrix(
    PyObject *obj,
    PyObject *args)
{
    PyObject *omat=NULL;
    PyArrayObject *mat=NULL, *result=NULL;
    int dims = 4;

    if (!PyArg_ParseTuple(args, "O", &omat)) return NULL;

    mat = (PyArrayObject *)PyArray_FROM_OTF(omat, NPY_DOUBLE, NPY_IN_ARRAY);

    /* input type checking */
    if (!mat) goto _fail;

    if (PyArray_ISCOMPLEX(mat)){
        PyErr_Format(PyExc_TypeError, "matrix can't be complex.");
        goto _fail;
    }
    if ((PyArray_NDIM(mat) != 2)) {
        PyErr_Format(PyExc_ValueError,
            "matrix must be two dimensional.");
        goto _fail;
    }
    if (PyArray_SIZE(mat) != 16) {
        PyErr_Format(PyExc_ValueError, "matrix must be 3x3 or 4x4");
        goto _fail;
    }

    result = (PyArrayObject*)PyArray_SimpleNew(1, &dims, NPY_DOUBLE);

    /* C function call */
    quaternion_from_matrix(
        (double *)PyArray_DATA(mat),
        (double *)PyArray_DATA(result));

    Py_DECREF(mat);
    return PyArray_Return(result);

_fail:
    Py_XDECREF(mat);
    return NULL;
}

/*
Python function wrapper for the quaternion_to_matrix() function.
*/
char py_quaternion_to_matrix_doc[] =
    "Return rotation matrix from quaternion.";

static PyObject *
py_quaternion_to_matrix(
    PyObject *obj,
    PyObject *args)
{
    PyObject *oquat=NULL;
    PyArrayObject *quat=NULL, *result=NULL;
    int dims[2]; dims[0] = dims[1] = 4;

    if (!PyArg_ParseTuple(args, "O", &oquat)) retur