Module hub.api.tensor
Expand source code
from typing import Union
import numpy as np
from hub.core.chunk_engine.read import read_array, read_tensor_meta
from hub.core.chunk_engine.write import write_array
from hub.core.typing import StorageProvider
from hub.util.slice import merge_slices
class Tensor:
def __init__(
self,
key: str,
provider: StorageProvider,
tensor_slice: slice = slice(None),
):
"""Initialize a new tensor.
Note:
This operation does not create a new tensor in the storage provider,
and should normally only be performed by Hub internals.
Args:
key (str): The internal identifier for this tensor.
provider (StorageProvider): The storage provider for the parent dataset.
tensor_slice (slice): The slice object restricting the view of this tensor.
"""
self.key = key
self.provider = provider
self.slice = tensor_slice
self.load_meta()
def load_meta(self):
meta = read_tensor_meta(self.key, self.provider)
self.num_samples = meta["length"]
self.shape = meta["max_shape"]
def __len__(self):
"""Return the length of the primary axis"""
return self.num_samples
def __getitem__(self, item: Union[int, slice]):
if isinstance(item, int):
item = slice(item, item + 1)
if isinstance(item, slice):
new_slice = merge_slices(self.slice, item)
return Tensor(self.key, self.provider, new_slice)
def __setitem__(self, item: Union[int, slice], value: np.ndarray):
sliced_self = self[item]
if sliced_self.slice != slice(None):
raise NotImplementedError(
"Assignment to Tensor slices not currently supported!"
)
else:
write_array(
array=value,
key=self.key,
storage=self.provider,
batched=True,
)
self.load_meta()
def __iter__(self):
for i in range(len(self)):
yield self[i]
def numpy(self):
"""Compute the contents of this tensor in numpy format.
Returns:
A numpy array containing the data represented by this tensor.
"""
return read_array(self.key, self.provider, self.slice)
Classes
class Tensor (key: str, provider: StorageProvider, tensor_slice: slice = slice(None, None, None))
-
Initialize a new tensor.
Note
This operation does not create a new tensor in the storage provider, and should normally only be performed by Hub internals.
Args
key
:str
- The internal identifier for this tensor.
provider
:StorageProvider
- The storage provider for the parent dataset.
tensor_slice
:slice
- The slice object restricting the view of this tensor.
Expand source code
class Tensor: def __init__( self, key: str, provider: StorageProvider, tensor_slice: slice = slice(None), ): """Initialize a new tensor. Note: This operation does not create a new tensor in the storage provider, and should normally only be performed by Hub internals. Args: key (str): The internal identifier for this tensor. provider (StorageProvider): The storage provider for the parent dataset. tensor_slice (slice): The slice object restricting the view of this tensor. """ self.key = key self.provider = provider self.slice = tensor_slice self.load_meta() def load_meta(self): meta = read_tensor_meta(self.key, self.provider) self.num_samples = meta["length"] self.shape = meta["max_shape"] def __len__(self): """Return the length of the primary axis""" return self.num_samples def __getitem__(self, item: Union[int, slice]): if isinstance(item, int): item = slice(item, item + 1) if isinstance(item, slice): new_slice = merge_slices(self.slice, item) return Tensor(self.key, self.provider, new_slice) def __setitem__(self, item: Union[int, slice], value: np.ndarray): sliced_self = self[item] if sliced_self.slice != slice(None): raise NotImplementedError( "Assignment to Tensor slices not currently supported!" ) else: write_array( array=value, key=self.key, storage=self.provider, batched=True, ) self.load_meta() def __iter__(self): for i in range(len(self)): yield self[i] def numpy(self): """Compute the contents of this tensor in numpy format. Returns: A numpy array containing the data represented by this tensor. """ return read_array(self.key, self.provider, self.slice)
Methods
def load_meta(self)
-
Expand source code
def load_meta(self): meta = read_tensor_meta(self.key, self.provider) self.num_samples = meta["length"] self.shape = meta["max_shape"]
def numpy(self)
-
Compute the contents of this tensor in numpy format.
Returns
A numpy array containing the data represented by this tensor.
Expand source code
def numpy(self): """Compute the contents of this tensor in numpy format. Returns: A numpy array containing the data represented by this tensor. """ return read_array(self.key, self.provider, self.slice)