import numpy as np
from ..util import max_range
from .interface import Interface, DataError
[docs]class MultiInterface(Interface):
"""
MultiInterface allows wrapping around a list of tabular datasets
including dataframes, the columnar dictionary format or 2D tabular
NumPy arrays. Using the split method the list of tabular data can
be split into individual datasets.
The interface makes the data appear a list of tabular datasets as
a single dataset. The length, shape and values methods therefore
make the data appear like a single array of concatenated subpaths,
separated by NaN values.
"""
types = ()
datatype = 'multitabular'
subtypes = ['dictionary', 'dataframe', 'array', 'dask']
multi = True
@classmethod
def init(cls, eltype, data, kdims, vdims):
new_data = []
dims = {'kdims': eltype.kdims, 'vdims': eltype.vdims}
if kdims is not None:
dims['kdims'] = kdims
if vdims is not None:
dims['vdims'] = vdims
if not isinstance(data, list):
raise ValueError('MultiInterface data must be a list tabular data types.')
prev_interface, prev_dims = None, None
for d in data:
d, interface, dims, _ = Interface.initialize(eltype, d, kdims, vdims,
datatype=cls.subtypes)
if prev_interface:
if prev_interface != interface:
raise DataError('MultiInterface subpaths must all have matching datatype.', cls)
if dims['kdims'] != prev_dims['kdims']:
raise DataError('MultiInterface subpaths must all have matching kdims.', cls)
if dims['vdims'] != prev_dims['vdims']:
raise DataError('MultiInterface subpaths must all have matching vdims.', cls)
new_data.append(d)
prev_interface, prev_dims = interface, dims
return new_data, dims, {}
@classmethod
def validate(cls, dataset, vdims=True):
if not dataset.data:
return
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
ds.interface.validate(ds, vdims)
@classmethod
def _inner_dataset_template(cls, dataset):
"""
Returns a Dataset template used as a wrapper around the data
contained within the multi-interface dataset.
"""
from . import Dataset
vdims = dataset.vdims if getattr(dataset, 'level', None) is None else []
return Dataset(dataset.data[0], datatype=cls.subtypes,
kdims=dataset.kdims, vdims=vdims)
@classmethod
def dimension_type(cls, dataset, dim):
if not dataset.data:
# Note: Required to make empty datasets work at all (should fix)
# Other interfaces declare equivalent of empty array
# which defaults to float type
return float
ds = cls._inner_dataset_template(dataset)
return ds.interface.dimension_type(ds, dim)
@classmethod
def range(cls, dataset, dim):
if not dataset.data:
return (None, None)
ranges = []
ds = cls._inner_dataset_template(dataset)
# Backward compatibility for Contours/Polygons level
level = getattr(dataset, 'level', None)
dim = dataset.get_dimension(dim)
if level is not None and dim is dataset.vdims[0]:
return (level, level)
for d in dataset.data:
ds.data = d
ranges.append(ds.interface.range(ds, dim))
return max_range(ranges)
[docs] @classmethod
def isscalar(cls, dataset, dim):
"""
Tests if dimension is scalar in each subpath.
"""
if not dataset.data:
return True
ds = cls._inner_dataset_template(dataset)
isscalar = []
for d in dataset.data:
ds.data = d
isscalar.append(ds.interface.isscalar(ds, dim))
return all(isscalar)
[docs] @classmethod
def select(cls, dataset, selection_mask=None, **selection):
"""
Applies selectiong on all the subpaths.
"""
if not self.dataset.data:
return []
ds = cls._inner_dataset_template(dataset)
data = []
for d in dataset.data:
ds.data = d
sel = ds.interface.select(ds, **selection)
data.append(sel)
return data
[docs] @classmethod
def select_paths(cls, dataset, selection):
"""
Allows selecting paths with usual NumPy slicing index.
"""
return [s[0] for s in np.array([{0: p} for p in dataset.data])[selection]]
@classmethod
def aggregate(cls, columns, dimensions, function, **kwargs):
raise NotImplementedError('Aggregation currently not implemented')
@classmethod
def groupby(cls, columns, dimensions, container_type, group_type, **kwargs):
raise NotImplementedError('Grouping currently not implemented')
@classmethod
def sample(cls, columns, samples=[]):
raise NotImplementedError('Sampling operation on subpaths not supported')
[docs] @classmethod
def shape(cls, dataset):
"""
Returns the shape of all subpaths, making it appear like a
single array of concatenated subpaths separated by NaN values.
"""
if not dataset.data:
return (0, len(dataset.dimensions()))
rows, cols = 0, 0
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
r, cols = ds.interface.shape(ds)
rows += r
return rows+len(dataset.data)-1, cols
[docs] @classmethod
def length(cls, dataset):
"""
Returns the length of the multi-tabular dataset making it appear
like a single array of concatenated subpaths separated by NaN
values.
"""
if not dataset.data:
return 0
length = 0
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
length += ds.interface.length(ds)
return length+len(dataset.data)-1
@classmethod
def nonzero(cls, dataset):
return bool(dataset.data)
@classmethod
def redim(cls, dataset, dimensions):
if not dataset.data:
return dataset.data
new_data = []
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
new_data.append(ds.interface.redim(ds, dimensions))
return new_data
[docs] @classmethod
def values(cls, dataset, dimension, expanded, flat):
"""
Returns a single concatenated array of all subpaths separated
by NaN values. If expanded keyword is False an array of arrays
is returned.
"""
if not dataset.data:
return np.array([])
values = []
ds = cls._inner_dataset_template(dataset)
didx = dataset.get_dimension_index(dimension)
for d in dataset.data:
ds.data = d
dvals = ds.interface.values(ds, dimension, expanded, flat)
if not len(dvals):
continue
elif expanded:
values.append(dvals)
values.append([np.NaN])
else:
values.append(dvals)
if not values:
return np.array()
elif expanded:
return np.concatenate(values[:-1])
else:
return np.concatenate(values)
[docs] @classmethod
def split(cls, dataset, start, end, datatype, **kwargs):
"""
Splits a multi-interface Dataset into regular Datasets using
regular tabular interfaces.
"""
objs = []
if datatype is None:
for d in dataset.data[start: end]:
objs.append(dataset.clone(d, datatype=cls.subtypes))
return objs
elif not dataset.data:
return objs
ds = cls._inner_dataset_template(dataset)
for d in dataset.data:
ds.data = d
if datatype == 'array':
obj = ds.array(**kwargs)
elif datatype == 'dataframe':
obj = ds.dframe(**kwargs)
elif datatype == 'columns':
if ds.interface.datatype == 'dictionary':
obj = dict(d)
else:
obj = ds.columns(**kwargs)
else:
raise ValueError("%s datatype not support" % datatype)
objs.append(obj)
return objs
Interface.register(MultiInterface)