Source code for h5features.index

# Copyright 2014-2016 Thomas Schatz, Mathieu Bernard, Roland Thiolliere
#
# This file is part of h5features.
#
# h5features is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# h5features is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with h5features.  If not, see <http://www.gnu.org/licenses/>.
"""Provides indexing facilities to the h5features package.

This index typically allows a faster read access in large datasets and
is transparent to the user.

Because the h5features package is designed to handle large datasets,
features and times data is internally stored in a compact *indexed*
representation.

"""

import numpy as np
from .entry import nb_per_chunk


[docs]def cumindex(features): """Return the index computed from features.""" return np.cumsum([x.shape[0] for x in features.data])
[docs]def create_index(group, chunk_size): """Create an empty index dataset in the given group.""" dtype = np.int64 chunks = (nb_per_chunk(np.dtype(dtype).itemsize, 1, chunk_size),) group.create_dataset('index', (0,), dtype=dtype, chunks=chunks, maxshape=(None,))
[docs]def write_index(data, group, append): """Write the data index to the given group. :param h5features.Data data: The that is being indexed. :param h5py.Group group: The group where to write the index. :param bool append: If True, append the created index to the existing one in the `group`. Delete any existing data in index if False. """ # build the index from data nitems = group['items'].shape[0] if 'items' in group else 0 last_index = group['index'][-1] if nitems > 0 else -1 index = last_index + cumindex(data._entries['features']) if append: nidx = group['index'].shape[0] # # in case we append to the end of an existing item # if data._entries['items']._continue_last_item(group): # nidx -= 1 group['index'].resize((nidx + index.shape[0],)) group['index'][nidx:] = index else: group['index'].resize((index.shape[0],)) group['index'][...] = index
[docs]def read_index(group, version='1.1'): """Return the index stored in a h5features group. :param h5py.Group group: The group to read the index from. :param str version: The h5features version of the `group`. :return: a 1D numpy array of features indices. """ if version == '0.1': return np.int64(group['index'][...]) elif version == '1.0': return group['file_index'][...] else: return group['index'][...]