Source code for h5features.labels

# Copyright 2014-2016 Thomas Schatz, Mathieu Bernard, Roland Thiolliere
#
# This file is part of h5features.
#
# h5features is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# h5features is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with h5features.  If not, see <http://www.gnu.org/licenses/>.

"""Provides the Labels class to the h5features module."""

import numpy as np
from .entry import Entry


[docs]class Labels(Entry):
    """This class manages labels related operations for h5features files

    :param labels: Each element of the list contains the labels of
        an h5features item. Empty list are not accepted. For all t
        in labels, we must have t.ndim to be either 1 or 2.

        * 1D arrays contain the center labelstamps of each frame of the
          related item.

        * 2D arrays contain the begin and end labelstamps of each
          items's frame, thus having t.ndim == 2 and t.shape[1] == 2.

    :type labels: list of numpy arrays

    :param bool check: If True, raise on errors

    :raise IOError: if the time format is not 1 or 2, or if labels
       arrays have different dimensions.

    :return: The parsed labels dimension is either 1 or 2 for 1D
        or 2D labels arrays respectively.

    """
    def __init__(self, labels, check=True):
        if check:
            self.check(labels)
        dim = self.parse_dim(labels)

        super(Labels, self).__init__('labels', labels, dim, np.float64, check)

    @staticmethod
[docs]    def check(labels):
        """Raise IOError if labels are not correct

        `labels` must be a list of sorted numpy arrays of equal
             dimensions (must be 1D or 2D). In the case of 2D labels,
             the second axis must have the same shape for all labels.

        """
        # type checking
        if not isinstance(labels, list):
            raise IOError('labels are not in a list')
        if not len(labels):
            raise IOError('the labels list is empty')
        if not all([isinstance(l, np.ndarray) for l in labels]):
            raise IOError('all labels must be numpy arrays')

        # dimension checking
        ndim = labels[0].ndim
        if ndim not in [1, 2]:
            raise IOError('labels dimension must be 1 or 2')
        if not all([l.ndim == ndim for l in labels]):
            raise IOError('all labels dimensions must be equal')
        if ndim == 2:
            shape1 = labels[0].shape[1]
            if not all([l.shape[1] == shape1 for l in labels]):
                raise IOError('all labels must have same shape on 2nd dim')

        # sort checking
        for label in labels:
            index = (np.argsort(label) if label.ndim == 1
                     else np.lexsort(label.T))
            # print label, index
            # print len(index), label.shape[0]
            assert len(index) == label.shape[0]
            if not all(n == index[n] for n in range(label.shape[0]-1)):
                raise IOError('labels are not sorted in increasing order')

    @staticmethod
[docs]    def parse_dim(labels):
        """Return the labels vectors dimension"""
        return 1 if labels[0].ndim == 1 else labels[0].shape[1]

    def __eq__(self, other):
        if self is other:
            return True
        try:
            # check little attributes
            if not (self.name == other.name and
                    self.dim == other.dim and
                    self.dtype == other.dtype and
                    len(self.data) == len(other.data)):
                return False
            # check big data
            for i in range(len(self.data)):
                if not (self.data[i] == other.data[i]).all():
                    return False
            return True
        except AttributeError:
            return False

[docs]    def is_appendable_to(self, group):
        shape = group[self.name].shape
        res = self.dim == 1 if len(shape) == 1 else self.data[0].ndim == 2
        return res

    def _dim_tuple(self, value):
        return (value,) if self.dim == 1 else (value, self.dim)

[docs]    def create_dataset(self, group, per_chunk):
        shape = self._dim_tuple(0)
        maxshape = self._dim_tuple(None)
        chunks = self._dim_tuple(per_chunk)

        group.create_dataset(self.name, shape, dtype=self.dtype,
                             chunks=chunks, maxshape=maxshape)

[docs]    def write_to(self, group):
        nb_data = sum([d.shape[0] for d in self.data])
        nb_group = group[self.name].shape[0]
        new_size = nb_group + nb_data

        if self.dim == 1:
            group[self.name].resize((new_size,))
            if len(self.data) == 1:
                group[self.name][nb_group:] = self.data[0].T
            else:
                group[self.name][nb_group:] = np.concatenate(self.data)
        else:
            group[self.name].resize((new_size, self.dim))
            group[self.name][nb_group:] = np.concatenate(self.data, axis=0)