Module hub.core.storage.tests.test_benchmark_storage_provider

Expand source code
import pytest

from hub.constants import GB
from hub.tests.common_benchmark import (
    parametrize_benchmark_chunk_sizes,
    BENCHMARK_CHUNK_SIZES,
)
from hub.core.tests.common import parametrize_all_caches, parametrize_all_storages
from hub.core.storage.tests.test_storage_provider import KEY  # type: ignore


SIMULATED_DATA_SIZES = [1 * GB]

# caclulate the number of chunks needed for each entry in `SIMULATED_DATA_SIZES`
NUM_CHUNKS = []
for chunk_size in BENCHMARK_CHUNK_SIZES:
    for data_size in SIMULATED_DATA_SIZES:
        NUM_CHUNKS.append(data_size // chunk_size)


mark_cache_group = pytest.mark.benchmark(group="storage_with_caches")
mark_no_cache_group = pytest.mark.benchmark(group="storage_without_caches")

parametrize_benchmark_num_chunks = pytest.mark.parametrize("num_chunks", NUM_CHUNKS)


def write_to_files(storage, chunk_size, num_chunks):
    chunk = b"1" * chunk_size
    for i in range(num_chunks):
        storage[f"{KEY}_{i}"] = chunk
    storage.flush()


def read_from_files(storage, num_chunks):
    for i in range(num_chunks):
        storage[f"{KEY}_{i}"]


@mark_no_cache_group
@parametrize_all_storages
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_storage_write_speeds(benchmark, storage, chunk_size, num_chunks):
    benchmark(write_to_files, storage, chunk_size, num_chunks)


@mark_cache_group
@parametrize_all_caches
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_cache_write_speeds(benchmark, storage, chunk_size, num_chunks):
    benchmark(write_to_files, storage, chunk_size, num_chunks)


@mark_no_cache_group
@parametrize_all_storages
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_storage_read_speeds(benchmark, storage, chunk_size, num_chunks):
    write_to_files(storage, chunk_size, num_chunks)
    benchmark(read_from_files, storage, num_chunks)


@mark_cache_group
@parametrize_all_caches
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_cache_read_speeds(benchmark, storage, chunk_size, num_chunks):
    write_to_files(storage, chunk_size, num_chunks)
    benchmark(read_from_files, storage, num_chunks)


@mark_cache_group
@parametrize_all_caches
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_full_cache_read_speeds(benchmark, storage, chunk_size, num_chunks):
    write_to_files(storage, chunk_size, num_chunks)
    read_from_files(storage, num_chunks)
    benchmark(read_from_files, storage, num_chunks)

Functions

def read_from_files(storage, num_chunks)
Expand source code
def read_from_files(storage, num_chunks):
    for i in range(num_chunks):
        storage[f"{KEY}_{i}"]
def test_cache_read_speeds(benchmark, storage, chunk_size, num_chunks)
Expand source code
@mark_cache_group
@parametrize_all_caches
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_cache_read_speeds(benchmark, storage, chunk_size, num_chunks):
    write_to_files(storage, chunk_size, num_chunks)
    benchmark(read_from_files, storage, num_chunks)
def test_cache_write_speeds(benchmark, storage, chunk_size, num_chunks)
Expand source code
@mark_cache_group
@parametrize_all_caches
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_cache_write_speeds(benchmark, storage, chunk_size, num_chunks):
    benchmark(write_to_files, storage, chunk_size, num_chunks)
def test_full_cache_read_speeds(benchmark, storage, chunk_size, num_chunks)
Expand source code
@mark_cache_group
@parametrize_all_caches
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_full_cache_read_speeds(benchmark, storage, chunk_size, num_chunks):
    write_to_files(storage, chunk_size, num_chunks)
    read_from_files(storage, num_chunks)
    benchmark(read_from_files, storage, num_chunks)
def test_storage_read_speeds(benchmark, storage, chunk_size, num_chunks)
Expand source code
@mark_no_cache_group
@parametrize_all_storages
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_storage_read_speeds(benchmark, storage, chunk_size, num_chunks):
    write_to_files(storage, chunk_size, num_chunks)
    benchmark(read_from_files, storage, num_chunks)
def test_storage_write_speeds(benchmark, storage, chunk_size, num_chunks)
Expand source code
@mark_no_cache_group
@parametrize_all_storages
@parametrize_benchmark_chunk_sizes
@parametrize_benchmark_num_chunks
def test_storage_write_speeds(benchmark, storage, chunk_size, num_chunks):
    benchmark(write_to_files, storage, chunk_size, num_chunks)
def write_to_files(storage, chunk_size, num_chunks)
Expand source code
def write_to_files(storage, chunk_size, num_chunks):
    chunk = b"1" * chunk_size
    for i in range(num_chunks):
        storage[f"{KEY}_{i}"] = chunk
    storage.flush()