Source code for h5features.items

# Copyright 2014-2016 Thomas Schatz, Mathieu Bernard, Roland Thiolliere
#
# This file is part of h5features.
#
# h5features is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# h5features is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with h5features.  If not, see <http://www.gnu.org/licenses/>.

"""Provides the Items class to the h5features package."""

import numpy as np

from h5py import special_dtype
from .entry import Entry, nb_per_chunk


[docs]def read_items(group, version='1.1', check=False): """Return an Items instance initialized from a h5features group.""" if version == '0.1': # parse unicode to strings return ''.join( [unichr(int(c)) for c in group['files'][...]] ).replace('/-', '/').split('/\\') elif version == '1.0': return Items(list(group['files'][...]), check) else: return Items(list(group['items'][...]), check)
[docs]class Items(Entry): """This class manages items in h5features files. :param data: A list of item names (e.g. files from which the features where extracted). Each name of the list must be unique. :type data: list of str :raise IOError: if data is empty or if one or more names are not unique in the list. """ def __init__(self, data, check=True): if check: if not data: raise IOError('data is empty') if not len(set(data)) == len(data): raise IOError('all items must have different names.') super(Items, self).__init__( 'items', data, 1, special_dtype(vlen=str), check)
[docs] def create_dataset(self, group, chunk_size): self._create_dataset(group, chunk_size)
[docs] def is_appendable_to(self, group): return not set(group[self.name][...]).intersection(self.data)
[docs] def write_to(self, group): """Write stored items to the given HDF5 group. We assume that self.create() has been called. """ # The HDF5 group where to write data items_group = group[self.name] nitems = items_group.shape[0] items_group.resize((nitems + len(self.data),)) items_group[nitems:] = self.data
[docs] def is_valid_interval(self, lower, upper): """Return False if [lower:upper] is not a valid subitems interval. If it is, then returns a tuple of (lower index, upper index)""" try: lower_idx = self.data.index(lower) upper_idx = self.data.index(upper) return (lower_idx, upper_idx) if lower_idx <= upper_idx else False except ValueError: return False
def _create_dataset(self, group, chunk_size): """Create an empty dataset in a group.""" # if dtype is a variable str, guess representative size is 20 bytes per_chunk = ( nb_per_chunk(20, 1, chunk_size) if self.dtype == np.dtype('O') else nb_per_chunk(np.dtype(self.dtype).itemsize, 1, chunk_size)) shape = (0,) maxshape = (None,) chunks = (per_chunk,) # raise if per_chunk >= 4 Gb, this is requested by h5py group.create_dataset( self.name, shape, dtype=self.dtype, chunks=chunks, maxshape=maxshape)