Module hub.core.storage.provider
Expand source code
from collections.abc import MutableMapping
from typing import Optional
from abc import ABC, abstractmethod
from hub.util.assert_byte_indexes import assert_byte_indexes
from hub.constants import BYTE_PADDING
class StorageProvider(ABC, MutableMapping):
"""An abstract base class for implementing a storage provider.
To add a new provider using Provider, create a subclass and implement all 5 abstract methods below.
"""
@abstractmethod
def __getitem__(self, path: str):
"""Gets the object present at the path within the given byte range.
Args:
path (str): The path relative to the root of the provider.
Returns:
bytes: The bytes of the object present at the path.
Raises:
KeyError: If an object is not found at the path.
"""
def get_bytes(
self,
path: str,
start_byte: Optional[int] = None,
end_byte: Optional[int] = None,
):
"""Gets the object present at the path within the given byte range.
Args:
path (str): The path relative to the root of the provider.
start_byte (int, optional): If only specific bytes starting from start_byte are required.
end_byte (int, optional): If only specific bytes up to end_byte are required.
Returns:
bytes: The bytes of the object present at the path within the given byte range.
Raises:
InvalidBytesRequestedError: If `start_byte` > `end_byte` or `start_byte` < 0 or `end_byte` < 0.
KeyError: If an object is not found at the path.
"""
assert_byte_indexes(start_byte, end_byte)
return self[path][start_byte:end_byte]
@abstractmethod
def __setitem__(self, path: str, value: bytes):
"""Sets the object present at the path with the value
Args:
path (str): the path relative to the root of the provider.
value (bytes): the value to be assigned at the path.
"""
def set_bytes(
self,
path: str,
value: bytes,
start_byte: Optional[int] = None,
overwrite: Optional[bool] = False,
):
"""Sets the object present at the path with the value
Args:
path (str): the path relative to the root of the provider.
value (bytes): the value to be assigned at the path.
start_byte (int, optional): If only specific bytes starting from start_byte are to be assigned.
overwrite (boolean, optional): If the value is True, if there is an object present at the path
it is completely overwritten, without fetching it's data.
Raises:
InvalidBytesRequestedError: If `start_byte` < 0.
"""
start_byte = start_byte or 0
end_byte = start_byte + len(value)
assert_byte_indexes(start_byte, end_byte)
if path in self and not overwrite:
current_value = bytearray(self[path])
# need to pad with zeros at the end to write extra bytes
if end_byte > len(current_value):
current_value = current_value.ljust(end_byte, BYTE_PADDING)
current_value[start_byte:end_byte] = value
self[path] = current_value
else:
# need to pad with zeros at the start to write from an offset
if start_byte != 0:
value = value.rjust(end_byte, BYTE_PADDING)
self[path] = value
@abstractmethod
def __iter__(self):
"""Generator function that iterates over the keys of the provider.
Yields:
str: the path of the object that it is iterating over, relative to the root of the provider.
"""
@abstractmethod
def __delitem__(self, path: str):
"""Delete the object present at the path.
Args:
path (str): the path to the object relative to the root of the provider.
Raises:
KeyError: If an object is not found at the path.
"""
@abstractmethod
def __len__(self):
"""Returns the number of files present inside the root of the provider.
Returns:
int: the number of files present inside the root.
"""
def flush(self):
"""Only needs to be implemented for caches. Flushes the data to the next storage provider.
Should be a no op for Base Storage Providers like local, s3, azure, gcs, etc.
"""
Classes
class StorageProvider
-
An abstract base class for implementing a storage provider.
To add a new provider using Provider, create a subclass and implement all 5 abstract methods below.
Expand source code
class StorageProvider(ABC, MutableMapping): """An abstract base class for implementing a storage provider. To add a new provider using Provider, create a subclass and implement all 5 abstract methods below. """ @abstractmethod def __getitem__(self, path: str): """Gets the object present at the path within the given byte range. Args: path (str): The path relative to the root of the provider. Returns: bytes: The bytes of the object present at the path. Raises: KeyError: If an object is not found at the path. """ def get_bytes( self, path: str, start_byte: Optional[int] = None, end_byte: Optional[int] = None, ): """Gets the object present at the path within the given byte range. Args: path (str): The path relative to the root of the provider. start_byte (int, optional): If only specific bytes starting from start_byte are required. end_byte (int, optional): If only specific bytes up to end_byte are required. Returns: bytes: The bytes of the object present at the path within the given byte range. Raises: InvalidBytesRequestedError: If `start_byte` > `end_byte` or `start_byte` < 0 or `end_byte` < 0. KeyError: If an object is not found at the path. """ assert_byte_indexes(start_byte, end_byte) return self[path][start_byte:end_byte] @abstractmethod def __setitem__(self, path: str, value: bytes): """Sets the object present at the path with the value Args: path (str): the path relative to the root of the provider. value (bytes): the value to be assigned at the path. """ def set_bytes( self, path: str, value: bytes, start_byte: Optional[int] = None, overwrite: Optional[bool] = False, ): """Sets the object present at the path with the value Args: path (str): the path relative to the root of the provider. value (bytes): the value to be assigned at the path. start_byte (int, optional): If only specific bytes starting from start_byte are to be assigned. overwrite (boolean, optional): If the value is True, if there is an object present at the path it is completely overwritten, without fetching it's data. Raises: InvalidBytesRequestedError: If `start_byte` < 0. """ start_byte = start_byte or 0 end_byte = start_byte + len(value) assert_byte_indexes(start_byte, end_byte) if path in self and not overwrite: current_value = bytearray(self[path]) # need to pad with zeros at the end to write extra bytes if end_byte > len(current_value): current_value = current_value.ljust(end_byte, BYTE_PADDING) current_value[start_byte:end_byte] = value self[path] = current_value else: # need to pad with zeros at the start to write from an offset if start_byte != 0: value = value.rjust(end_byte, BYTE_PADDING) self[path] = value @abstractmethod def __iter__(self): """Generator function that iterates over the keys of the provider. Yields: str: the path of the object that it is iterating over, relative to the root of the provider. """ @abstractmethod def __delitem__(self, path: str): """Delete the object present at the path. Args: path (str): the path to the object relative to the root of the provider. Raises: KeyError: If an object is not found at the path. """ @abstractmethod def __len__(self): """Returns the number of files present inside the root of the provider. Returns: int: the number of files present inside the root. """ def flush(self): """Only needs to be implemented for caches. Flushes the data to the next storage provider. Should be a no op for Base Storage Providers like local, s3, azure, gcs, etc. """
Ancestors
- abc.ABC
- collections.abc.MutableMapping
- collections.abc.Mapping
- collections.abc.Collection
- collections.abc.Sized
- collections.abc.Iterable
- collections.abc.Container
Subclasses
Methods
def flush(self)
-
Only needs to be implemented for caches. Flushes the data to the next storage provider. Should be a no op for Base Storage Providers like local, s3, azure, gcs, etc.
Expand source code
def flush(self): """Only needs to be implemented for caches. Flushes the data to the next storage provider. Should be a no op for Base Storage Providers like local, s3, azure, gcs, etc. """
def get_bytes(self, path: str, start_byte: Union[int, NoneType] = None, end_byte: Union[int, NoneType] = None)
-
Gets the object present at the path within the given byte range.
Args
path
:str
- The path relative to the root of the provider.
start_byte
:int
, optional- If only specific bytes starting from start_byte are required.
end_byte
:int
, optional- If only specific bytes up to end_byte are required.
Returns
bytes
- The bytes of the object present at the path within the given byte range.
Raises
InvalidBytesRequestedError
- If
start_byte
>end_byte
orstart_byte
< 0 orend_byte
< 0. KeyError
- If an object is not found at the path.
Expand source code
def get_bytes( self, path: str, start_byte: Optional[int] = None, end_byte: Optional[int] = None, ): """Gets the object present at the path within the given byte range. Args: path (str): The path relative to the root of the provider. start_byte (int, optional): If only specific bytes starting from start_byte are required. end_byte (int, optional): If only specific bytes up to end_byte are required. Returns: bytes: The bytes of the object present at the path within the given byte range. Raises: InvalidBytesRequestedError: If `start_byte` > `end_byte` or `start_byte` < 0 or `end_byte` < 0. KeyError: If an object is not found at the path. """ assert_byte_indexes(start_byte, end_byte) return self[path][start_byte:end_byte]
def set_bytes(self, path: str, value: bytes, start_byte: Union[int, NoneType] = None, overwrite: Union[bool, NoneType] = False)
-
Sets the object present at the path with the value
Args
path
:str
- the path relative to the root of the provider.
value
:bytes
- the value to be assigned at the path.
start_byte
:int
, optional- If only specific bytes starting from start_byte are to be assigned.
overwrite
:boolean
, optional- If the value is True, if there is an object present at the path it is completely overwritten, without fetching it's data.
Raises
InvalidBytesRequestedError
- If
start_byte
< 0.
Expand source code
def set_bytes( self, path: str, value: bytes, start_byte: Optional[int] = None, overwrite: Optional[bool] = False, ): """Sets the object present at the path with the value Args: path (str): the path relative to the root of the provider. value (bytes): the value to be assigned at the path. start_byte (int, optional): If only specific bytes starting from start_byte are to be assigned. overwrite (boolean, optional): If the value is True, if there is an object present at the path it is completely overwritten, without fetching it's data. Raises: InvalidBytesRequestedError: If `start_byte` < 0. """ start_byte = start_byte or 0 end_byte = start_byte + len(value) assert_byte_indexes(start_byte, end_byte) if path in self and not overwrite: current_value = bytearray(self[path]) # need to pad with zeros at the end to write extra bytes if end_byte > len(current_value): current_value = current_value.ljust(end_byte, BYTE_PADDING) current_value[start_byte:end_byte] = value self[path] = current_value else: # need to pad with zeros at the start to write from an offset if start_byte != 0: value = value.rjust(end_byte, BYTE_PADDING) self[path] = value