"""
Supplies MultiDimensionalMapping and NdMapping which are multi-dimensional
map types. The former class only allows indexing whereas the latter
also enables slicing over multiple dimension ranges.
"""
from itertools import cycle
from operator import itemgetter
import numpy as np
import param
from . import util
from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement
from .util import (unique_iterator, sanitize_identifier, dimension_sort,
basestring, wrap_tuple, process_ellipses, get_ndmapping_label, pd)
[docs]class item_check(object):
"""
Context manager to allow creating NdMapping types without
performing the usual item_checks, providing significant
speedups when there are a lot of items. Should only be
used when both keys and values are guaranteed to be the
right type, as is the case for many internal operations.
"""
def __init__(self, enabled):
self.enabled = enabled
def __enter__(self):
self._enabled = MultiDimensionalMapping._check_items
MultiDimensionalMapping._check_items = self.enabled
def __exit__(self, exc_type, exc_val, exc_tb):
MultiDimensionalMapping._check_items = self._enabled
[docs]class sorted_context(object):
"""
Context manager to temporarily disable sorting on NdMapping
types. Retains the current sort order, which can be useful as
an optimization on NdMapping instances where sort=True but the
items are already known to have been sorted.
"""
def __init__(self, enabled):
self.enabled = enabled
def __enter__(self):
self._enabled = MultiDimensionalMapping.sort
MultiDimensionalMapping.sort = self.enabled
def __exit__(self, exc_type, exc_val, exc_tb):
MultiDimensionalMapping.sort = self._enabled
[docs]class MultiDimensionalMapping(Dimensioned):
"""
An MultiDimensionalMapping is a Dimensioned mapping (like a
dictionary or array) that uses fixed-length multidimensional
keys. This behaves like a sparse N-dimensional array that does not
require a dense sampling over the multidimensional space.
If the underlying value for each (key,value) pair also supports
indexing (such as a dictionary, array, or list), fully qualified
(deep) indexing may be used from the top level, with the first N
dimensions of the index selecting a particular Dimensioned object
and the remaining dimensions indexing into that object.
For instance, for a MultiDimensionalMapping with dimensions "Year"
and "Month" and underlying values that are 2D floating-point
arrays indexed by (r,c), a 2D array may be indexed with x[2000,3]
and a single floating-point number may be indexed as
x[2000,3,1,9].
In practice, this class is typically only used as an abstract base
class, because the NdMapping subclass extends it with a range of
useful slicing methods for selecting subsets of the data. Even so,
keeping the slicing support separate from the indexing and data
storage methods helps make both classes easier to understand.
"""
group = param.String(default='MultiDimensionalMapping', constant=True)
kdims = param.List(default=[Dimension("Default")], constant=True)
vdims = param.List(default=[], bounds=(0, 0), constant=True)
sort = param.Boolean(default=True, doc="""
Whether the items should be sorted in the constructor.""")
data_type = None # Optional type checking of elements
_deep_indexable = False
_check_items = True
def __init__(self, initial_items=None, kdims=None, **params):
if isinstance(initial_items, MultiDimensionalMapping):
params = dict(util.get_param_values(initial_items),
**dict({'sort': self.sort}, **params))
if kdims is not None:
params['kdims'] = kdims
super(MultiDimensionalMapping, self).__init__(OrderedDict(), **dict(params))
if type(initial_items) is dict and not self.sort:
raise ValueError('If sort=False the data must define a fixed '
'ordering, please supply a list of items or '
'an OrderedDict, not a regular dictionary.')
self._next_ind = 0
self._check_key_type = True
if initial_items is None: initial_items = []
if isinstance(initial_items, tuple):
self._add_item(initial_items[0], initial_items[1])
elif not self._check_items:
if isinstance(initial_items, dict):
initial_items = initial_items.items()
elif isinstance(initial_items, MultiDimensionalMapping):
initial_items = initial_items.data.items()
self.data = OrderedDict((k if isinstance(k, tuple) else (k,), v)
for k, v in initial_items)
if self.sort:
self._resort()
elif initial_items is not None:
self.update(OrderedDict(initial_items))
def _item_check(self, dim_vals, data):
"""
Applies optional checks to individual data elements before
they are inserted ensuring that they are of a certain
type. Subclassed may implement further element restrictions.
"""
if self.data_type is not None and not isinstance(data, self.data_type):
if isinstance(self.data_type, tuple):
data_type = tuple(dt.__name__ for dt in self.data_type)
else:
data_type = self.data_type.__name__
raise TypeError('{slf} does not accept {data} type, data elements have '
'to be a {restr}.'.format(slf=type(self).__name__,
data=type(data).__name__,
restr=data_type))
elif not len(dim_vals) == self.ndims:
raise KeyError('Key has to match number of dimensions.')
def _add_item(self, dim_vals, data, sort=True, update=True):
"""
Adds item to the data, applying dimension types and ensuring
key conforms to Dimension type and values.
"""
sort = sort and self.sort
if not isinstance(dim_vals, tuple):
dim_vals = (dim_vals,)
self._item_check(dim_vals, data)
# Apply dimension types
dim_types = zip([kd.type for kd in self.kdims], dim_vals)
dim_vals = tuple(v if None in [t, v] else t(v) for t, v in dim_types)
valid_vals = zip(self.kdims, dim_vals)
for dim, val in valid_vals:
if dim.values and val is not None and val not in dim.values:
raise KeyError('%s dimension value %s not in'
' specified dimension values.' % (dim, repr(val)))
# Updates nested data structures rather than simply overriding them.
if (update and (dim_vals in self.data)
and isinstance(self.data[dim_vals], (MultiDimensionalMapping, OrderedDict))):
self.data[dim_vals].update(data)
else:
self.data[dim_vals] = data
if sort:
self._resort()
def _apply_key_type(self, keys):
"""
If a type is specified by the corresponding key dimension,
this method applies the type to the supplied key.
"""
typed_key = ()
for dim, key in zip(self.kdims, keys):
key_type = dim.type
if key_type is None:
typed_key += (key,)
elif isinstance(key, slice):
sl_vals = [key.start, key.stop, key.step]
typed_key += (slice(*[key_type(el) if el is not None else None
for el in sl_vals]),)
elif key is Ellipsis:
typed_key += (key,)
elif isinstance(key, list):
typed_key += ([key_type(k) for k in key],)
else:
typed_key += (key_type(key),)
return typed_key
def _split_index(self, key):
"""
Partitions key into key and deep dimension groups. If only key
indices are supplied, the data is indexed with an empty tuple.
Keys with indices than there are dimensions will be padded.
"""
if not isinstance(key, tuple):
key = (key,)
elif key == ():
return (), ()
if key[0] is Ellipsis:
num_pad = self.ndims - len(key) + 1
key = (slice(None),) * num_pad + key[1:]
elif len(key) < self.ndims:
num_pad = self.ndims - len(key)
key = key + (slice(None),) * num_pad
map_slice = key[:self.ndims]
if self._check_key_type:
map_slice = self._apply_key_type(map_slice)
if len(key) == self.ndims:
return map_slice, ()
else:
return map_slice, key[self.ndims:]
def _dataslice(self, data, indices):
"""
Returns slice of data element if the item is deep
indexable. Warns if attempting to slice an object that has not
been declared deep indexable.
"""
if self._deep_indexable and isinstance(data, Dimensioned) and indices:
return data[indices]
elif len(indices) > 0:
self.warning('Cannot index into data element, extra data'
' indices ignored.')
return data
def _resort(self):
self.data = OrderedDict(dimension_sort(self.data, self.kdims, self.vdims,
range(self.ndims)))
[docs] def clone(self, data=None, shared_data=True, *args, **overrides):
"""
Overrides Dimensioned clone to avoid checking items if data
is unchanged.
"""
with item_check(not shared_data and self._check_items):
return super(MultiDimensionalMapping, self).clone(data, shared_data,
*args, **overrides)
[docs] def groupby(self, dimensions, container_type=None, group_type=None, **kwargs):
"""
Splits the mapping into groups by key dimension which are then
returned together in a mapping of class container_type. The
individual groups are of the same type as the original map.
This operation will always sort the groups and the items in
each group.
"""
if self.ndims == 1:
self.warning('Cannot split Map with only one dimension.')
return self
container_type = container_type if container_type else type(self)
group_type = group_type if group_type else type(self)
dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
with item_check(False):
return util.ndmapping_groupby(self, dimensions, container_type,
group_type, sort=True, **kwargs)
[docs] def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
"""
Create a new object with an additional key dimensions.
Requires the dimension name or object, the desired position
in the key dimensions and a key value scalar or sequence of
the same length as the existing keys.
"""
if not isinstance(dimension, Dimension):
dimension = Dimension(dimension)
if dimension in self.dimensions():
raise Exception('{dim} dimension already defined'.format(dim=dimension.name))
if vdim and self._deep_indexable:
raise Exception('Cannot add value dimension to object that is deep indexable')
if vdim:
dims = self.vdims[:]
dims.insert(dim_pos, dimension)
dimensions = dict(vdims=dims)
dim_pos += self.ndims
else:
dims = self.kdims[:]
dims.insert(dim_pos, dimension)
dimensions = dict(kdims=dims)
if isinstance(dim_val, basestring) or not hasattr(dim_val, '__iter__'):
dim_val = cycle([dim_val])
else:
if not len(dim_val) == len(self):
raise ValueError("Added dimension values must be same length"
"as existing keys.")
items = OrderedDict()
for dval, (key, val) in zip(dim_val, self.data.items()):
if vdim:
new_val = list(val)
new_val.insert(dim_pos, dval)
items[key] = tuple(new_val)
else:
new_key = list(key)
new_key.insert(dim_pos, dval)
items[tuple(new_key)] = val
return self.clone(items, **dict(dimensions, **kwargs))
[docs] def drop_dimension(self, dimensions):
"""
Returns a new mapping with the named dimension(s) removed.
"""
dimensions = [dimensions] if np.isscalar(dimensions) else dimensions
dims = [d for d in self.kdims if d not in dimensions]
dim_inds = [self.get_dimension_index(d) for d in dims]
key_getter = itemgetter(*dim_inds)
return self.clone([(key_getter(k), v) for k, v in self.data.items()],
kdims=dims)
[docs] def dimension_values(self, dimension, expanded=True, flat=True):
"Returns the values along the specified dimension."
dimension = self.get_dimension(dimension, strict=True)
if dimension in self.kdims:
return np.array([k[self.get_dimension_index(dimension)] for k in self.data.keys()])
if dimension in self.dimensions():
values = [el.dimension_values(dimension) for el in self
if dimension in el.dimensions()]
vals = np.concatenate(values)
return vals if expanded else util.unique_array(vals)
else:
return super(MultiDimensionalMapping, self).dimension_values(dimension, expanded, flat)
[docs] def reindex(self, kdims=[], force=False):
"""
Create a new object with a re-ordered or reduced set of key
dimensions.
Reducing the number of key dimensions will discard information
from the keys. All data values are accessible in the newly
created object as the new labels must be sufficient to address
each value uniquely.
"""
old_kdims = [d.name for d in self.kdims]
if not len(kdims):
kdims = [d for d in old_kdims
if not len(set(self.dimension_values(d))) == 1]
indices = [self.get_dimension_index(el) for el in kdims]
keys = [tuple(k[i] for i in indices) for k in self.data.keys()]
reindexed_items = OrderedDict(
(k, v) for (k, v) in zip(keys, self.data.values()))
reduced_dims = set([d.name for d in self.kdims]).difference(kdims)
dimensions = [self.get_dimension(d) for d in kdims
if d not in reduced_dims]
if len(set(keys)) != len(keys) and not force:
raise Exception("Given dimension labels not sufficient"
"to address all values uniquely")
if len(keys):
cdims = {self.get_dimension(d): self.dimension_values(d)[0] for d in reduced_dims}
else:
cdims = {}
with item_check(indices == sorted(indices)):
return self.clone(reindexed_items, kdims=dimensions,
cdims=cdims)
@property
def last(self):
"Returns the item highest data item along the map dimensions."
return list(self.data.values())[-1] if len(self) else None
@property
def last_key(self):
"Returns the last key value."
return list(self.keys())[-1] if len(self) else None
@property
def info(self):
"""
Prints information about the Dimensioned object, including the
number and type of objects contained within it and information
about its dimensions.
"""
if (len(self.values()) > 0):
info_str = self.__class__.__name__ +\
" containing %d items of type %s\n" % (len(self.keys()),
type(self.values()[0]).__name__)
else:
info_str = self.__class__.__name__ + " containing no items\n"
info_str += ('-' * (len(info_str)-1)) + "\n\n"
aliases = {v: k for k, v in self._dim_aliases.items()}
for group in self._dim_groups:
dimensions = getattr(self, group)
if dimensions:
group = aliases[group].split('_')[0]
info_str += '%s Dimensions: \n' % group.capitalize()
for d in dimensions:
dmin, dmax = self.range(d.name)
if d.value_format:
dmin, dmax = d.value_format(dmin), d.value_format(dmax)
info_str += '\t %s: %s...%s \n' % (d.pprint_label, dmin, dmax)
print(info_str)
[docs] def table(self, datatype=None, **kwargs):
"Creates a table from the stored keys and data."
if datatype is None:
datatype = ['dataframe' if pd else 'dictionary']
tables = []
for key, value in self.data.items():
value = value.table(datatype=datatype, **kwargs)
for idx, (dim, val) in enumerate(zip(self.kdims, key)):
value = value.add_dimension(dim, idx, val)
tables.append(value)
return value.interface.concatenate(tables)
[docs] def dframe(self):
"Creates a pandas DataFrame from the stored keys and data."
try:
import pandas
except ImportError:
raise Exception("Cannot build a DataFrame without the pandas library.")
labels = self.dimensions('key', True) + [self.group]
return pandas.DataFrame(
[dict(zip(labels, k + (v,))) for (k, v) in self.data.items()])
[docs] def update(self, other):
"""
Updates the current mapping with some other mapping or
OrderedDict instance, making sure that they are indexed along
the same set of dimensions. The order of key dimensions remains
unchanged after the update.
"""
if isinstance(other, NdMapping):
dims = [d for d in other.kdims if d not in self.kdims]
if len(dims) == other.ndims:
raise KeyError("Cannot update with NdMapping that has"
" a different set of key dimensions.")
elif dims:
other = other.drop_dimension(dims)
other = other.data
for key, data in other.items():
self._add_item(key, data, sort=False)
if self.sort:
self._resort()
[docs] def keys(self):
" Returns the keys of all the elements."
if self.ndims == 1:
return [k[0] for k in self.data.keys()]
else:
return list(self.data.keys())
[docs] def values(self):
" Returns the values of all the elements."
return list(self.data.values())
[docs] def items(self):
"Returns all elements as a list in (key,value) format."
return list(zip(list(self.keys()), list(self.values())))
[docs] def get(self, key, default=None):
"Standard get semantics for all mapping types"
try:
if key is None:
return None
return self[key]
except KeyError:
return default
[docs] def pop(self, key, default=None):
"Standard pop semantics for all mapping types"
if not isinstance(key, tuple): key = (key,)
return self.data.pop(key, default)
def __getitem__(self, key):
"""
Allows multi-dimensional indexing in the order of the
specified key dimensions, passing any additional indices to
the data elements.
"""
if key in [Ellipsis, ()]:
return self
map_slice, data_slice = self._split_index(key)
return self._dataslice(self.data[map_slice], data_slice)
def __setitem__(self, key, value):
self._add_item(key, value, update=False)
def __str__(self):
return repr(self)
def __iter__(self):
return iter(self.values())
def __contains__(self, key):
if self.ndims == 1:
return key in self.data.keys()
else:
return key in self.keys()
def __len__(self):
return len(self.data)
[docs]class NdMapping(MultiDimensionalMapping):
"""
NdMapping supports the same indexing semantics as
MultiDimensionalMapping but also supports slicing semantics.
Slicing semantics on an NdMapping is dependent on the ordering
semantics of the keys. As MultiDimensionalMapping sort the keys, a
slice on an NdMapping is effectively a way of filtering out the
keys that are outside the slice range.
"""
group = param.String(default='NdMapping', constant=True)
def __getitem__(self, indexslice):
"""
Allows slicing operations along the key and data
dimensions. If no data slice is supplied it will return all
data elements, otherwise it will return the requested slice of
the data.
"""
if isinstance(indexslice, np.ndarray) and indexslice.dtype.kind == 'b':
if not len(indexslice) == len(self):
raise IndexError("Boolean index must match length of sliced object")
selection = zip(indexslice, self.data.items())
return self.clone([item for c, item in selection if c])
elif indexslice == () and not self.kdims:
return self.data[()]
elif indexslice in [Ellipsis, ()]:
return self
elif Ellipsis in wrap_tuple(indexslice):
indexslice = process_ellipses(self, indexslice)
map_slice, data_slice = self._split_index(indexslice)
map_slice = self._transform_indices(map_slice)
map_slice = self._expand_slice(map_slice)
if all(not (isinstance(el, (slice, set, list, tuple)) or callable(el))
for el in map_slice):
return self._dataslice(self.data[map_slice], data_slice)
else:
conditions = self._generate_conditions(map_slice)
items = self.data.items()
for cidx, (condition, dim) in enumerate(zip(conditions, self.kdims)):
values = dim.values
items = [(k, v) for k, v in items
if condition(values.index(k[cidx])
if values else k[cidx])]
sliced_items = []
for k, v in items:
val_slice = self._dataslice(v, data_slice)
if val_slice or isinstance(val_slice, tuple):
sliced_items.append((k, val_slice))
if len(sliced_items) == 0:
raise KeyError('No items within specified slice.')
with item_check(False):
return self.clone(sliced_items)
def _expand_slice(self, indices):
"""
Expands slices containing steps into a list.
"""
keys = list(self.data.keys())
expanded = []
for idx, ind in enumerate(indices):
if isinstance(ind, slice) and ind.step is not None:
dim_ind = slice(ind.start, ind.stop)
if dim_ind == slice(None):
condition = self._all_condition()
elif dim_ind.start is None:
condition = self._upto_condition(dim_ind)
elif dim_ind.stop is None:
condition = self._from_condition(dim_ind)
else:
condition = self._range_condition(dim_ind)
dim_vals = unique_iterator(k[idx] for k in keys)
expanded.append(set([k for k in dim_vals if condition(k)][::int(ind.step)]))
else:
expanded.append(ind)
return tuple(expanded)
def _transform_indices(self, indices):
"""
Identity function here but subclasses can implement transforms
of the dimension indices from one coordinate system to another.
"""
return indices
def _generate_conditions(self, map_slice):
"""
Generates filter conditions used for slicing the data structure.
"""
conditions = []
for dim, dim_slice in zip(self.kdims, map_slice):
if isinstance(dim_slice, slice):
start, stop = dim_slice.start, dim_slice.stop
if dim.values:
values = dim.values
dim_slice = slice(None if start is None else values.index(start),
None if stop is None else values.index(stop))
if dim_slice == slice(None):
conditions.append(self._all_condition())
elif start is None:
conditions.append(self._upto_condition(dim_slice))
elif stop is None:
conditions.append(self._from_condition(dim_slice))
else:
conditions.append(self._range_condition(dim_slice))
elif isinstance(dim_slice, (set, list)):
if dim.values:
dim_slice = [dim.values.index(dim_val)
for dim_val in dim_slice]
conditions.append(self._values_condition(dim_slice))
elif dim_slice is Ellipsis:
conditions.append(self._all_condition())
elif callable(dim_slice):
conditions.append(dim_slice)
elif isinstance(dim_slice, (tuple)):
raise IndexError("Keys may only be selected with sets or lists, not tuples.")
else:
if dim.values:
dim_slice = dim.values.index(dim_slice)
conditions.append(self._value_condition(dim_slice))
return conditions
def _value_condition(self, value):
return lambda x: x == value
def _values_condition(self, values):
return lambda x: x in values
def _range_condition(self, slice):
if slice.step is None:
lmbd = lambda x: slice.start <= x < slice.stop
else:
lmbd = lambda x: slice.start <= x < slice.stop and not (
(x-slice.start) % slice.step)
return lmbd
def _upto_condition(self, slice):
if slice.step is None:
lmbd = lambda x: x < slice.stop
else:
lmbd = lambda x: x < slice.stop and not (x % slice.step)
return lmbd
def _from_condition(self, slice):
if slice.step is None:
lmbd = lambda x: x >= slice.start
else:
lmbd = lambda x: x >= slice.start and ((x-slice.start) % slice.step)
return lmbd
def _all_condition(self):
return lambda x: True