/* psf.c

A Python/Numpy C extension module for calculating point spread functions.
Refer to the associated psf.py module for documentation.

Authors:
  Christoph Gohlke, Laboratory for Fluorescence Dynamics.
  Oliver Holub, Laboratory for Fluorescence Dynamics.

Copyright (c) 2007-2008, The Regents of the University of California
Produced by the Laboratory for Fluorescence Dynamics.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright
  notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
  notice, this list of conditions and the following disclaimer in the
  documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holders nor the names of any
  contributors may be used to endorse or promote products derived
  from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/

/*****************************************************************************/
/* setup.py

"""A Python script to build the _psf extension module.

Usage:: ``python setup.py build_ext --inplace``

"""

from distutils.core import setup, Extension

include_dirs=[
    "C:/Python25/Lib/site-packages/numpy/core/include",
    "/usr/lib/python2.5/site-packages/numpy/core/include",
    "/Library/Frameworks/Python.framework/Versions/2.5/lib/python2.5"
    "/site-packages/numpy/core/include"]

setup(name='_psf', ext_modules=[Extension('_psf', ['psf.c'],
    include_dirs=include_dirs, extra_compile_args=['-O2','-G6'])],)

******************************************************************************/

#define _VERSION_ "20080502"

#define WIN32_LEAN_AND_MEAN

#include "Python.h"
#include "math.h"
#include "float.h"
#include "string.h"
#include "numpy/arrayobject.h"
#include "numpy/ufuncobject.h"

#ifndef M_PI
#define M_PI (3.1415926535897932384626433832795)
#endif
#define M_2PI (6.283185307179586476925286766559)

#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#define SWAP(a,b) { register double t=(a); (a)=(b); (b)=t; }

#define BESSEL_LEN 1001 /* length of Bessel function lookup table */
#define BESSEL_RES 10.0 /* resolution of Bessel function lookup table */
#define BESSEL_INT 60   /* steps for integrating the Bessel integral */

/* lookup table for Bessel function of the first kind, orders 0, 1, and 2 */
double bessel_lut[BESSEL_LEN*3];

/*****************************************************************************/
/* C functions

Input validation and error handling is done in the Python wrapper functions.
*/

/*
Fast rounding of floating point to integer numbers.
*/
#ifdef WIN32

int floor_int(double x)
{
    int i;
    const double _f = -0.5f;
    __asm {
        fld x
        fadd st, st(0)
        fadd _f
        fistp i
        sar i, 1
    }
    return i;
}

int ceil_int(double x)
{
    int i;
    const double _f = -0.5f;
    __asm {
        fld x
        fadd st, st(0)
        fsubr _f
        fistp i
        sar i, 1
    }
    return (-i);
}

#else
#define floor_int(x) (int)floor((double)(x))
#define ceil_int(x) (int)ceil((double)(x))
#endif

/*
Return Bessel function at x for orders 0, 1, and 2.
Values are linearly interpolated from the lookup table.
*/
void bessel_lookup(double x, double* result)
{
    double* p;
    double alpha = x * BESSEL_RES;
    int index = floor_int(alpha);

    if (index < BESSEL_LEN) {
        p = &bessel_lut[index*3];
        alpha -= (double)index;
        result[0] = p[0] + alpha * (p[3] - p[0]);
        result[1] = p[1] + alpha * (p[4] - p[1]);
        result[2] = p[2] + alpha * (p[5] - p[2]);
    }
    else
        result[0] = result[1] = result[2] = 0.0;
}

/*
Initialize the global Bessel function lookup table.
Values are approximated by integrating Bessel's integral.
*/
int bessel_init(void)
{
    int i, j;
    double x, t, xst, dt, *ptr;

    memset(bessel_lut, 0, BESSEL_LEN*3*sizeof(double));

    dt = M_PI / (BESSEL_INT);
    ptr = bessel_lut;
    for (i = 0; i < BESSEL_LEN; i++) {
        x = (double)i / BESSEL_RES;
        for (j = 0; j < BESSEL_INT; j++) {
            t = j * dt;
            xst = x * sin(t);
            ptr[0] += cos(-xst);
            ptr[2] += cos(2.0*t - xst);
        }
        ptr[0] /= BESSEL_INT;
        ptr[2] /= BESSEL_INT;
        ptr += 3;
    }

    dt = M_PI / (BESSEL_INT-1);
    ptr = bessel_lut+1;
    for (i = 0; i < BESSEL_LEN; i++) {
        x = (double)i / BESSEL_RES;
        for (j = 0; j < BESSEL_INT; j++) {
            t = j * dt;
            *ptr += cos(t - x*sin(t));
        }
        *ptr /= BESSEL_INT-1;
        ptr += 3;
    }
    return 1;
}

/*
Calculate 2D Gaussian distribution.
*/
int gaussian2d(double *out, int* shape, double* sigma)
{
    int z, r;
    double sz, sr, t;

    if ((out == NULL) || (sigma[0] == 0) || (sigma[1] == 0))
        return 0;

    sz = -0.5 / (sigma[0]*sigma[0]);
    sr = -0.5 / (sigma[1]*sigma[1]);

    for (z = 0; z < shape[0]; z++) {
        t = z*z * sz;
        for (r = 0; r < shape[1]; r++)
            *out++ = exp(t + r*r*sr);
    }
    return 1;
}

/*
Calculate Gaussian parameters for the nonparaxial widefield case.
*/
void sigma_widefield(double* sz, double* sr, double nk, double cosa)
{
    double t = pow(cosa, 1.5);
    *sr = 1.0 / (nk * sqrt((4. - 7.*t + 3.*pow(cosa,3.5)) / (7.*(1. - t))));
    *sz = (5.*sqrt(7.)*(1. - t)) / (nk * sqrt(6.*(
        4.*pow(cosa,5) - 25.*pow(cosa,3.5) + 42.*pow(cosa,2.5) - 25.*t + 4.)));
}

/*
Calculate Gaussian parameters for point spread function approximation
according to B Zhang et al. Appl. Optics (46) 1819-29, 2007.
*/
int gaussian_sigma(double* sz, double* sr, double lex, double lem,
                   double NA, double n, double r, int widefield, int paraxial)
{
    if ((NA <= 0.0) || (n <= 0.0) || (lem <= 0.0) || ((NA/n) >= 1.0))
        return 0;

    if (widefield) {
        if (paraxial) {
            *sr = sqrt(2.) * lem / (M_2PI * NA);
            *sz = sqrt(6.) * lem / (M_2PI * NA*NA) * n * 2.;
        }
        else {
            sigma_widefield(sz, sr, n*M_2PI/lem, cos(asin(NA/n)));
        }
    }
    else {
        if ((r <= 0.0) || (lex <= 0.0))
            return 0;
        if (paraxial) {
            double kem = M_2PI / lem;
            double c1 = M_2PI / lex * r * NA;
            double c2 = M_2PI / lem * r * NA;
            double J0, J1, J[3];
            bessel_lookup(c2, J);
            J0 = J[0];
            J1 = J[1];
            *sr = sqrt(2./(c1*c1/(r*r) +
                (4.*c2*J0*J1 - 8.*J1*J1) / (r*r*(J0*J0 + J1*J1 - 1.))));
            *sz = 2.*sqrt(6./((c1*c1*NA*NA)/(r*r*n*n) -
                (48.*c2*c2*(J0*J0 + J1*J1) - 192.*J1*J1) /
                (n*n*kem*kem*r*r*r*r*(J0*J0 + J1*J1 - 1.))));
        }
        else {
            double e, sz_em, sr_em, sz_ex, sr_ex;
            double cosa = cos(asin(NA/n));
            sigma_widefield(&sz_ex, &sr_ex, n*M_2PI/lex, cosa);
            sigma_widefield(&sz_em, &sr_em, n*M_2PI/lem, cosa);
            e = sr_em*sr_em;
            e = 2.0 * e*e * (exp(r*r/(2.0*e)) - 1.0);
            *sr = sqrt((sr_ex*sr_ex * e) / (e + r*r * sr_ex*sr_ex));
            *sz = sz_ex*sz_em / sqrt(sz_ex*sz_ex + sz_em*sz_em);
        }
    }
    return 1;
}

/*
Apodization function for excitation.
*/
double apodization_excitation(double ct, double st, double _, double beta)
{
    return sqrt(ct) * exp(st*st * beta);
}

/*
Apodization function for isotropic fluorescence emission.
*/
double apodization_emission(double ct, double st, double M, double _)
{
    double t = M*st;
    return sqrt(ct / sqrt(1.0 - t*t));
}

/*
Calculate the Point Spread Function for unpolarized or circular polarized
light according to the diffraction proposed by Richards and Wolf.
See supporting information of B Huang et al. Chem Phys Chem (5) 1523-31, 2004.
*/
int psf(
    int type,        /* PSF type: 0: excitation, 1: emission */
    double *data,    /* output array[shape[0]][shape[1]] */
    int* shape,      /* shape of data array */
    double* uvdim,   /* optical units in u and v dimension */
    double M,        /* lateral magnification factor */
    double sinalpha, /* numerical aperture / refractive index of medium */
    double beta,     /* underfilling ratio (1.0) */
    double gamma,    /* ex_wavelen / em_wavelen / refractive index (1.0) */
    int intsteps     /* number of steps for integrating over theta (50) */
    )
{
    int i, j, k, u_shape, v_shape;
    double u, v, t, st, ct, re, im, re0, im0, re1, im1, re2, im2;
    double const0, const1, u_delta, v_delta, bessel[3];
    double alpha; /* integration over theta upper limit */
    double delta; /* step size for integrating over theta */
    double *cache, *cptr, *dptr;
    double (*apodization)(double, double, double, double);

    if ((intsteps < 4) || (sinalpha <= 0.0) || (sinalpha >= 1.0))
        return 0;

    switch (type) {
        case 0: /* excitation */
            apodization = apodization_excitation;
            alpha = asin(sinalpha);
            beta = -beta*beta / (sinalpha * sinalpha);
            gamma = M = 1.0;
            break;
        case 1: /* emission */
            apodization = apodization_emission;
            alpha = asin(sinalpha / M);
            beta = 1.0;
            break;
        default:
            return 0;
    }

    delta = alpha / (double)(intsteps-1);

    cache = cptr = (double *)malloc((intsteps*5)*sizeof(double));
    if (cache == NULL) return 0;

    const0 = gamma / sinalpha;
    const1 = gamma / (sinalpha * sinalpha);

    /* cache some values used in inner integration loop */
    for (k = 0; k < intsteps; k++) {
        t = k * delta;
        st = sin(t);
        ct = cos(t);
        t = st * apodization(ct, st, M, beta);
        cptr[0] = st * const0;
        cptr[1] = ct * const1;
        cptr[2] = t * (1.0 + ct);
        cptr[3] = t * st * (2.0); /* 4*I1(u,v) */
        cptr[4] = t * (1.0 - ct);
        cptr += 5;
    }

    u_shape = shape[0];
    v_shape = shape[1];
    u_delta = uvdim[0] / (double)(u_shape-1);
    v_delta = uvdim[1] / (double)(v_shape-1);
    dptr = data;

    for (i = 0; i < u_shape; i++)
    {
        u = u_delta * (double) i;
        for (j = 0; j < v_shape; j++)
        {
            v = v_delta * (double) j;
            re0 = im0 = re1 = im1 = re2 = im2 = 0.0;
            cptr = cache;
            /* integrate over theta using trapezoid rule */
            bessel_lookup(v * cptr[0], bessel);
            ct = u * cptr[1]; re = cos(ct); im = sin(ct);
            t = bessel[0]*cptr[2]*0.5; re0 += re*t; im0 += im*t;
            t = bessel[1]*cptr[3]*0.5; re1 += re*t; im1 += im*t;
            t = bessel[2]*cptr[4]*0.5; re2 += re*t; im2 += im*t;
            cptr += 5;
            for (k = 1; k < intsteps-1; k++)
            {
                bessel_lookup(v * cptr[0], bessel);
                ct = u * cptr[1];
                re = cos(ct); /* complex exponential with re=0 */
                im = sin(ct);
                t = bessel[0]*cptr[2]; re0 += re*t; im0 += im*t;
                t = bessel[1]*cptr[3]; re1 += re*t; im1 += im*t;
                t = bessel[2]*cptr[4]; re2 += re*t; im2 += im*t;
                cptr += 5;
            }
            bessel_lookup(v * cptr[0], bessel);
            ct = u * cptr[1]; re = cos(ct); im = sin(ct);
            t = bessel[0]*cptr[2]*0.5; re0 += re*t; im0 += im*t;
            t = bessel[1]*cptr[3]*0.5; re1 += re*t; im1 += im*t;
            t = bessel[2]*cptr[4]*0.5; re2 += re*t; im2 += im*t;

            *dptr++ = (re0*re0 + im0*im0 +
                       re1*re1 + im1*im1 +
                       re2*re2 + im2*im2);
        }
    }
    t = data[0];
    for (i = 0; i < u_shape*v_shape; i++)
        data[i] /= t;

    free(cache);
    return 1;
}

/*
Calculate the observation volume for unpolarized light by multiplying the
excitation PSF with the convolution of the emission PSF and detector kernel.

The PSFs and the detector kernel must have equal physical sizes per pixel.
The PSFs must be in zr space, the detector kernel in xy space.
*/
int obsvol(
    int dimz,         /* size of PSF arrays in z dimension */
    int dimr,         /* size of PSF arrays in r dimension */
    int dimd,         /* pixel size of detector array (must be square) */
    double *obsvol,   /* output array[dimu, dimr, dimr] */
    double *ex_psf,   /* excitation PSF array[dimu, dimr] */
    double *em_psf,   /* emission PSF array[dimu, dimr] */
    double *detector) /* detector kernel array[dimd, dimd] or NULL */
{
    int z, x, y, r, xx, i, ii, ri, index, indey, *_ri;
    double sum, rf, x2, t;
    double *exptr, *emptr, *ovptr, *_rf, *_em;
    int _dimd = dimd;

    if (detector == NULL) {
        /* approximation for large pinholes == widefield */
        i = 0;
        for (z = 0; z < dimz; z++) {
            sum = em_psf[i] * M_PI * 0.25;
            ii = i;
            for (r = 1; r < dimr; r++)
                sum += em_psf[++ii] * (double)r;
            sum *= M_2PI;
            for (r = 0; r < dimr; r++) {
                obsvol[i] = ex_psf[i] * sum;
                i++;
            }
        }
    }
    else if (dimd < 2) {
        /* approximation for very small pinholes */
        for (i = 0; i < dimz*dimr; i++) {
            obsvol[i] = ex_psf[i] * em_psf[i];
        }
    }
    else {
        /* use detector/pinhole kernel */
        if (dimd > dimr) dimd = dimr;
        /* floor integer and remainder float of radius at xy */
        _ri = (int *)malloc((dimr*dimd)*sizeof(int));
        if (_ri == NULL) return 0;
        _rf = (double *)malloc((dimr*dimd)*sizeof(double));
        if (_rf == NULL) {free(_ri); return 0;}
        /* em_psf at xy */
        _em = (double *)malloc((dimr*dimd)*sizeof(double));
        if (_em == NULL) {free(_ri); free(_rf); return 0;}

        for (x = 0; x < dimd; x++) {
            x2 = x*x;
            indey = x;
            index = x*dimd;
            _ri[index] = _ri[indey] = x;
            _rf[index] = _rf[indey] = 0.0;
            for (y = 1; y <= x; y++) {
                index++;
                indey += dimd;
                rf = sqrt(x2 + y*y);
                ri = floor_int(rf);
                _ri[index] = _ri[indey] = (dimr > ri) ? ri : -1;
                _rf[index] = _rf[indey] = (dimr > ri+1) ? rf-(double)ri : 0.0;
            }
        }
        for (x = dimd; x < dimr; x++) {
            index = x*dimd;
            _ri[index] = x;
            _rf[index] = 0.0;
            x2 = x*x;
            for (y = 1; y < dimd; y++) {
                index++;
                rf = sqrt(x2 + y*y);
                ri = floor_int(rf);
                _ri[index] = (dimr > ri) ? ri : -1;
                _rf[index] = (dimr > ri+1) ? rf-(double)ri : 0.0;
            }
        }
        for (z = 0; z < dimz; z++) {
            exptr = &ex_psf[z*dimr];
            emptr = &em_psf[z*dimr];
            ovptr = &obsvol[z*dimr];
            /* emission psf in xy space */
            for (x = 0; x < dimd; x++) {
                indey = x;
                index = x*dimd;
                _em[index] = _em[indey] = emptr[x];
                for (y = 1; y <= x; y++) {
                    index++;
                    indey += dimd;
                    ri = _ri[index];
                    if (ri >= 0) {
                        rf = _rf[index];
                        _em[index] = _em[indey] =
                            rf ? emptr[ri]+rf*(emptr[ri+1]-emptr[ri])
                               : emptr[ri];
                    }
                    else
                        _em[index] = _em[indey] = 0.0;
                }
            }
            for (x = dimd; x < dimr; x++) {
                index = x*dimd;
                _em[index] = emptr[x];
                for (y = 1; y < dimd; y++) {
                    index++;
                    ri = _ri[index];
                    if (ri >= 0) {
                        rf = _rf[index];
                        _em[index] =
                            rf ? emptr[ri]+rf*(emptr[ri+1]-emptr[ri])
                               : emptr[ri];
                    }
                    else
                        _em[index] = 0.0;
                }
            }
            for (r = 0; r < dimr; r++) {
                /* Convolute emission PSF with detector kernel.
                For large kernels this is inefficient and should be
                replaced by FFT. */
                sum = 0.0;
                i = 1-dimd + (_dimd-dimd);
                for (x = r-dimd+1; x < MIN(r+dimd, dimr); x++) {
                    xx = abs(x) * dimd;
                    ii = abs(i++) * _dimd;
                    for (y = 0; y < dimd; y++)
                        sum += _em[xx++] * detector[ii++];
                }
                /* multiply integral with excitation psf */
                ovptr[r] = sum * exptr[r];
            }
        }
        free(_ri);
        free(_rf);
      &nb