Source code for h5features.entry

# Copyright 2014-2016 Thomas Schatz, Mathieu Bernard, Roland Thiolliere
#
# This file is part of h5features.
#
# h5features is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# h5features is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with h5features.  If not, see <http://www.gnu.org/licenses/>.
"""Provides the Entry class to the h5features package."""

import numpy as np


[docs]def nb_per_chunk(item_size, item_dim, chunk_size): """Return the number of items that can be stored in one chunk. :param int item_size: Size of an item's scalar componant in Bytes (e.g. for np.float64 this is 8) :param int item_dim: Items dimension (length of the second axis) :param float chunk_size: The size of a chunk given in MBytes. """ # from Mbytes to bytes size = chunk_size * 10.**6 ratio = int(round(size / (item_size*item_dim))) return max(10, ratio)
[docs]class Entry(object): """The Entry class is the base class of h5features.Data entries. It provides a shared interface to the classes ``Items``, ``Times`` and ``Features`` which all together compose a ``Data``. """ def __init__(self, name, data, dim, dtype, check=True): if check: if not isinstance(data, list): raise ValueError('data must be a list') if dim < 1: raise ValueError('dimension must be strictly positive') self.name = name self.data = data self.dim = dim self.dtype = dtype def __eq__(self, other): if self is other: return True try: return (self.is_appendable(other) and self.data == other.data) except AttributeError: return False def __ne__(self, other): return not self.__eq__(other)
[docs] def clear(self): """Erase stored data""" self.data = []
[docs] def is_appendable(self, entry): """Return True if entry can be appended to self""" try: if ( self.name == entry.name and self.dtype == entry.dtype and self.dim == entry.dim ): return True except AttributeError: return False return False
[docs] def append(self, entry): """Append an entry to self""" if not self.is_appendable(entry): raise ValueError('entry not appendable') self.data += entry.data
def _create_dataset(self, group, chunk_size): """Create an empty dataset in a group.""" # if dtype is a variable str, guess representative size is 20 bytes per_chunk = ( nb_per_chunk(20, self.dim, chunk_size) if self.dtype == np.dtype('O') else nb_per_chunk(np.dtype(self.dtype).itemsize, self.dim, chunk_size)) shape = (0, self.dim) maxshape = (None, self.dim) chunks = (per_chunk, self.dim) # raise if per_chunk >= 4 Gb, this is requested by h5py group.create_dataset( self.name, shape, dtype=self.dtype, chunks=chunks, maxshape=maxshape)