Source code for h5features.h5features

# Copyright 2014-2016 Thomas Schatz, Mathieu Bernard, Roland Thiolliere
#
# This file is part of h5features.
#
# h5features is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# h5features is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with h5features.  If not, see <http://www.gnu.org/licenses/>.

"""Provides the read() and write() wrapper functions.

.. note::

   For compatibility with h5features 1.0, this legacy top-level API
   have been conserved in this module. Except for use in legacy code,
   it is **better not to use it**. Use instead the `h5features.writer`
   and `h5features.reader` modules.

"""

from .data import Data
from .reader import Reader
from .writer import Writer


[docs]def read(filename, groupname=None, from_item=None, to_item=None, from_time=None, to_time=None, index=None): """Reads in a h5features file. :param str filename: Path to a hdf5 file potentially serving as a container for many small files :param str groupname: HDF5 group to read the data from. If None, guess there is one and only one group in `filename`. :param str from_item: Optional. Read the data starting from this item. (defaults to the first stored item) :param str to_item: Optional. Read the data until reaching the item. (defaults to from_item if it was specified and to the last stored item otherwise) :param float from_time: Optional. (defaults to the beginning time in from_item) the specified times are included in the output :param float to_time: Optional. (defaults to the ending time in to_item) the specified times are included in the output :param int index: Optional. For faster access. TODO Document and test this. :return: A tuple (times, features) such as: * time is a dictionary of 1D arrays values (keys are items). * features: A dictionary of 2D arrays values (keys are items) with the 'feature' dimension along the columns and the 'time' dimension along the lines. .. note:: Note that all the files that are present on disk between to_item and from_item will be loaded and returned. It's the responsibility of the user to make sure that it will fit into RAM memory. """ # TODO legacy read from index not implemented if index is not None: raise NotImplementedError reader = Reader(filename, groupname) data = (reader.read(from_item, to_item, from_time, to_time) if index is None else reader.index_read(index)) return data.dict_labels(), data.dict_features()
[docs]def write(filename, groupname, items, times, features, dformat='dense', chunk_size=0.1, sparsity=0.1, mode='a'): """Write h5features data in a HDF5 file. This function is a wrapper to the Writer class. It has three purposes: * Check parameters for errors (see details below), * Create Items, Times and Features objects * Send them to the Writer. :param str filename: HDF5 file to be writted, potentially serving as a container for many small files. If the file does not exist, it is created. If the file is already a valid HDF5 file, try to append the data in it. :param str groupname: Name of the group to write the data in, or to append the data to if the group already exists in the file. :param items: List of files from which the features where extracted. Items must not contain duplicates. :type items: list of str :param times: Time value for the features array. Elements of a 1D array are considered as the center of the time window associated with the features. A 2D array must have 2 columns corresponding to the begin and end timestamps of the features time window. :type times: list of 1D or 2D numpy arrays :param features: Features should have time along the lines and features along the columns (accomodating row-major storage in hdf5 files). :type features: list of 2D numpy arrays :param str dformat: Optional. Which format to store the features into (sparse or dense). Default is dense. :param float chunk_size: Optional. In Mo, tuning parameter corresponding to the size of a chunk in the h5file. Ignored if the file already exists. :param float sparsity: Optional. Tuning parameter corresponding to the expected proportion (in [0, 1]) of non-zeros elements on average in a single frame. :param char mode: Optional. The mode for overwriting an existing file, 'a' to append data to the file, 'w' to overwrite it :raise IOError: if the filename is not valid or parameters are inconsistent. :raise NotImplementedError: if dformat == 'sparse' """ # Prepare the data, raise on error sparsity = sparsity if dformat == 'sparse' else None data = Data(items, times, features, sparsity=sparsity, check=True) # Write all that stuff in the HDF5 file's specified group Writer(filename, chunk_size=chunk_size).write(data, groupname, append=True)
[docs]def simple_write(filename, group, times, features, item='item', mode='a'): """Simplified version of `write()` when there is only one item.""" write(filename, group, [item], [times], [features], mode=mode)