# -*- coding: utf-8 -*-
# qdafile.py

# Copyright (c) 2007-2014, Christoph Gohlke
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holders nor the names of any
#   contributors may be used to endorse or promote products derived
#   from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

"""Read and write KaleidaGraph(tm) QDA data files.

KaleidaGraph is a registered trademark of `Abelbeck Software
<http://www.synergy.com>`_.

:Author: `Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_

:Version: 2013.01.18

Requirements
------------
* `CPython 2.7 or 3.3 <http://www.python.org>`_
* `Numpy 1.7 <http://www.numpy.org>`_

Examples
--------
>>> from qdafile import QDAfile
>>> QDAfile().write('_empty.qda')
>>> QDAfile(
...     [[1.0, 2.0, 0.], [3.0, 4.0, 5.0], [6.0, 7.0, 0.]],
...     rows=[2, 3, '2'],
...     headers=['X', 'Y', 'Z'],
...     dtypes=['>f8', '>i4', '>f4'],
...     ).write('_test.qda')
>>> qda = QDAfile("_test.qda")
>>> print(qda.headers[2], qda[2, :qda.rows[2]])
Z [ 6.  7.]

"""

from __future__ import division, print_function

import sys
import struct

import numpy

__version__ = '2013.01.18'
__docformat__ = 'restructuredtext en'
__all__ = ['QDAfile']


class QDAfile(object):
    """Read or write KaleidaGraph(tm) version 3.x QDA data files.

    Only numeric data types (float, double, and int) are supported.
    All data are converted to double on import. The byte order of the
    binary files is big endian (Motorola).

    Raises IOError or ValueError on failure.

    Attributes
    ----------
    name :
        file name
    data :
        2D numpy array
    columns :
        number of columns
    headers :
        sequence of column headers
    rows :
        sequence of number of rows in column
    dtypes :
        sequence of column data types ('>f4', '>f8', or '>i4')

    """
    _fileid = {b'\x00\x06': 6, b'\x00\x08': 8, b'\x00\x0C': 12}
    _dtypes = {0: '>f4', 3: '>f8', 4: '>i4', '>f4': 0, '>f8': 3, '>i4': 4}

    def __init__(self, arg=None, **kwargs):
        """Initialize instance using file name/descriptor or data array.

        If arg is an array, keyword arguments can be used to initialize
        name, headers, rows, and dtypes attributes.

        Raises IOError or ValueError on failure.

        """
        self.fid = 12
        self.name = "Untitled"
        self.data = None
        self.columns = None
        self.rows = None
        self.headers = None
        self.dtypes = None

        if arg is None:
            self._fromdata([], **kwargs)
        elif isinstance(arg, basestring if sys.version[0] == '2' else str):
            with open(arg, 'rb') as fh:
                self._fromfile(fh)
        elif hasattr(arg, 'seek'):
            self._fromfile(arg)
        else:
            self._fromdata(arg, **kwargs)

    def write(self, arg=None):
        """Save data to QDA file."""
        if arg is None:
            arg = self.name
        if hasattr(arg, 'seek'):
            self._tofile(arg)
        else:
            with open(arg, 'wb') as fh:
                self._tofile(fh)

    def _fromfile(self, fh):
        """Initialize instance from open file object.

        Raises IOError if file can not be read.

        """
        fid = fh.read(2)
        try:
            self.fid = self._fileid[fid]
        except KeyError:
            raise IOError("not a QDA file or unsupported version")

        columns = numpy.fromfile(fh, dtype='>i2', count=1)[0]
        if 1000 < columns < 0:
            raise IOError("not a QDA file")

        fh.read(512 - 4)
        rows = list(numpy.fromfile(fh, count=columns,
                                   dtype='>i4' if self.fid == 12 else '>i2'))

        try:
            dtypes = [self._dtypes[dt] for dt in
                      numpy.fromfile(fh, dtype='>i2', count=columns)]
        except KeyError:
            raise IOError("the file contains data of unsupported type")

        headers = [s.split(b'\x00', 1)[0] for s in
                   numpy.fromfile(fh, dtype='S40', count=columns)]

        data = numpy.empty((columns, max(rows) if rows else 0),
                           dtype=numpy.float64)
        data[:] = numpy.NaN
        for i, (row, dtype) in enumerate(zip(rows, dtypes)):
            data[i, 0:row] = numpy.fromfile(fh, dtype=dtype, count=row)
            fh.read(136 + 2 * row)

        self.name = fh.name
        self.data = data
        self.dtypes = dtypes
        self.columns = columns
        self.rows = rows
        self.headers = headers

    def _fromdata(self, data, name="Untitled.qda", headers=None,
                  rows=None, dtypes=None):
        """Initialize instance from data array and optional arguments.

        Raises ValueError if data is incompatible with file format.

        """
        data = numpy.array(data, dtype='>f8')
        data = numpy.atleast_2d(data)
        if len(data.shape) > 2:
            raise ValueError("data array must be 2 dimensional or less")

        try:
            columns = data.shape[0]
        except IndexError:
            columns = 0
        else:
            if columns > 1000:
                raise ValueError("dimensions of data array are too large")

        if rows:
            try:
                rows = [int(rows[i]) for i in range(columns)]
            except (IndexError, TypeError, ValueError):
                raise ValueError("invalid rows argument")
        else:
            try:
                rows = [data.shape[1]] * columns
            except IndexError:
                rows = [0]

        if max(rows) > 32768:
            raise ValueError("data array dimensions are too large")

        if headers:
            try:
                headers = [headers[i][0:40] for i in range(columns)]
            except IndexError:
                raise ValueError("invalid headers argument")
        else:
            headers = unique_headers(columns)

        if dtypes:
            try:
                [self._dtypes[str(dtypes[i])] for i in range(columns)]
            except (IndexError, KeyError):
                raise ValueError("invalid dtypes argument")
        else:
            dtypes = ['>f8'] * columns

        if (len(dtypes) != columns
                or len(headers) != columns
                or len(rows) != columns):
            raise ValueError("invalid argument(s)")

        self.fid = 12
        self.name = name
        self.data = data
        self.columns = columns
        self.rows = rows
        self.headers = headers
        self.dtypes = dtypes

    def _tofile(self, fh):
        """Write data to an open file."""
        fh.write(b'\x00\x0C')
        fh.write(struct.pack('>h', self.columns))
        fh.write(b'\x00\x0E\x01\x02\x00\x05\x00\x01')
        fh.write(b'\x00' * (512 - 12))
        func = str if sys.version[0] == '2' else lambda x: bytes(x, 'ascii')
        for r in self.rows:
            fh.write(struct.pack('>i', r))
        for t in self.dtypes:
            fh.write(struct.pack('>h', self._dtypes[t]))
        for h in self.headers:
            h = func(h)
            fh.write(h + b'\x00' * (40 - len(h)))
        for i, (r, t, h) in enumerate(zip(self.rows, self.dtypes,
                                          self.headers)):
            self.data[i, 0:r].astype(t).tofile(fh, format=t)
            fh.write(b'\x00\x01' * r)
            fh.write(b'\x0E\x02\x01\x00\x05\x00\x00\x01')
            h = func(h)
            fh.write(h + b'\x00' * (128 - len(h)))

    def __str__(self):
        return "\n".join("%14s: %s" % t for t in (
            ("File Name", self.name),
            ("File ID", self.fid),
            ("Columns", self.columns),
            ("Rows", self.rows),
            ("Headers", self.headers),
            ("Data Types", self.dtypes), ))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, key):
        return self.data[key]


def unique_headers(number):
    """Return list of unique column headers.

    Examples
    --------
    >>> unique_headers(3)
    ['A', 'B', 'C']

    """
    chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    headers = []
    for i in chars:
        if number:
            headers.append(i)
        else:
            return headers
        number -= 1
    for i in chars:
        for j in chars:
            if number:
                headers.append(i + j)
            else:
                return headers
            number -= 1
    for i in chars:
        for j in chars:
            for k in chars:
                if number:
                    headers.append(i + j + k)
                else:
                    return headers
                number -= 1
    raise NotImplementedError()


if __name__ == "__main__":
    import doctest
    doctest.testmod()