Source code for h5features.index

# Copyright 2014-2016 Thomas Schatz, Mathieu Bernard, Roland Thiolliere
#
# This file is part of h5features.
#
# h5features is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# h5features is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with h5features.  If not, see <http://www.gnu.org/licenses/>.
"""Provides indexing facilities to the h5features package.

This index typically allows a faster read access in large datasets and
is transparent to the user.

Because the h5features package is designed to handle large datasets,
features and times data is internally stored in a compact *indexed*
representation.

"""

import numpy as np
from .entry import nb_per_chunk


[docs]def cumindex(features):
    """Return the index computed from features."""
    return np.cumsum([x.shape[0] for x in features.data])


[docs]def create_index(group, chunk_size):
    """Create an empty index dataset in the given group."""
    dtype = np.int64
    chunks = (nb_per_chunk(np.dtype(dtype).itemsize, 1, chunk_size),)
    group.create_dataset('index', (0,), dtype=dtype,
                         chunks=chunks, maxshape=(None,))


[docs]def write_index(data, group, append):
    """Write the data index to the given group.

    :param h5features.Data data: The that is being indexed.
    :param h5py.Group group: The group where to write the index.
    :param bool append: If True, append the created index to the
        existing one in the `group`. Delete any existing data in index
        if False.

    """
    # build the index from data
    nitems = group['items'].shape[0] if 'items' in group else 0
    last_index = group['index'][-1] if nitems > 0 else -1
    index = last_index + cumindex(data._entries['features'])

    if append:
        nidx = group['index'].shape[0]
        # # in case we append to the end of an existing item
        # if data._entries['items']._continue_last_item(group):
        #     nidx -= 1

        group['index'].resize((nidx + index.shape[0],))
        group['index'][nidx:] = index
    else:
        group['index'].resize((index.shape[0],))
        group['index'][...] = index


[docs]def read_index(group, version='1.1'):
    """Return the index stored in a h5features group.

    :param h5py.Group group: The group to read the index from.
    :param str version: The h5features version of the `group`.
    :return: a 1D numpy array of features indices.
    """
    if version == '0.1':
        return np.int64(group['index'][...])
    elif version == '1.0':
        return group['file_index'][...]
    else:
        return group['index'][...]