Spaces:
Running
on
Zero
Running
on
Zero
| """Hdf5 data backend. | |
| This backend works with filepaths pointing to valid HDF5 files. We assume that | |
| the given HDF5 file contains the whole dataset associated to this backend. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from typing import Literal | |
| import numpy as np | |
| from vis4d.common.imports import H5PY_AVAILABLE | |
| from .base import DataBackend | |
| if H5PY_AVAILABLE: | |
| import h5py | |
| from h5py import File | |
| else: | |
| raise ImportError("Please install h5py to enable HDF5Backend.") | |
| class HDF5Backend(DataBackend): | |
| """Backend for loading data from HDF5 files. | |
| This backend works with filepaths pointing to valid HDF5 files. We assume | |
| that the given HDF5 file contains the whole dataset associated to this | |
| backend. | |
| You can use the provided script at vis4d/data/datasets/to_hdf5.py to | |
| convert your dataset to the expected hdf5 format before using this backend. | |
| """ | |
| def __init__(self) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| if not H5PY_AVAILABLE: | |
| raise ImportError("Please install h5py to enable HDF5Backend.") | |
| self.db_cache: dict[str, File] = {} | |
| def _get_hdf5_path( | |
| filepath: str, allow_omitted_ext: bool = True | |
| ) -> tuple[str, list[str]]: | |
| """Get .hdf5 path and keys from filepath. | |
| Args: | |
| filepath (str): The filepath to retrieve the data from. | |
| Should have the following format: 'path/to/file.hdf5/key1/key2' | |
| allow_omitted_ext (bool, optional): Whether to allow omitted | |
| extension, in which case the backend will try to append | |
| '.hdf5' to the filepath. Defaults to True. | |
| Returns: | |
| tuple[str, list[str]]: The .hdf5 path and the keys to retrieve. | |
| Examples: | |
| >>> HDF5Backend._get_hdf5_path("path/to/file.hdf5/key1/key2") | |
| ("path/to/file.hdf5", ["key2", "key1"]) | |
| >>> HDF5Backend._get_hdf5_path("path/to/file/key1/key2", True) | |
| ("path/to/file.hdf5", ["key2", "key1"]) # if file.hdf5 exists and | |
| # is a valid hdf5 file | |
| """ | |
| filepath_as_list = filepath.split("/") | |
| keys = [] | |
| while True: | |
| if filepath.endswith(".hdf5") or filepath == "": | |
| break | |
| if allow_omitted_ext and h5py.is_hdf5(filepath + ".hdf5"): | |
| filepath = filepath + ".hdf5" | |
| break | |
| keys.append(filepath_as_list.pop()) | |
| filepath = "/".join(filepath_as_list) | |
| return filepath, keys | |
| def exists(self, filepath: str) -> bool: | |
| """Check if filepath exists. | |
| Args: | |
| filepath (str): Path to file. | |
| Returns: | |
| bool: True if file exists, False otherwise. | |
| """ | |
| hdf5_path, keys = self._get_hdf5_path(filepath) | |
| if not os.path.exists(hdf5_path): | |
| return False | |
| value_buf = self._get_client(hdf5_path, "r") | |
| while keys: | |
| value_buf = value_buf.get(keys.pop()) | |
| if value_buf is None: | |
| return False | |
| return True | |
| def set( | |
| self, filepath: str, content: bytes, mode: Literal["w", "a"] = "a" | |
| ) -> None: | |
| """Set the file content. | |
| Args: | |
| filepath: path/to/file.hdf5/key1/key2/key3 | |
| content: Bytes to be written to entry key3 within group key2 | |
| within another group key1, for example. | |
| mode: "w" to overwrite the file, "a" to append to it. | |
| Raises: | |
| ValueError: If filepath is not a valid .hdf5 file | |
| """ | |
| if ".hdf5" not in filepath: | |
| raise ValueError(f"{filepath} not a valid .hdf5 filepath!") | |
| hdf5_path, keys_str = filepath.split(".hdf5") | |
| key_list = keys_str.split("/") | |
| file = self._get_client(hdf5_path + ".hdf5", mode) | |
| if len(key_list) > 1: | |
| group_str = "/".join(key_list[:-1]) | |
| if group_str == "": | |
| group_str = "/" | |
| group = file[group_str] | |
| key = key_list[-1] | |
| group.create_dataset( | |
| key, data=np.frombuffer(content, dtype="uint8") | |
| ) | |
| def _get_client(self, hdf5_path: str, mode: str) -> File: | |
| """Get HDF5 client from path. | |
| Args: | |
| hdf5_path (str): Path to HDF5 file. | |
| mode (str): Mode to open the file in. | |
| Returns: | |
| File: the hdf5 file. | |
| """ | |
| if hdf5_path not in self.db_cache: | |
| client = File(hdf5_path, mode, swmr=True, libver="latest") | |
| self.db_cache[hdf5_path] = [client, mode] | |
| else: | |
| client, current_mode = self.db_cache[hdf5_path] | |
| if current_mode != mode: | |
| client.close() | |
| client = File(hdf5_path, mode, swmr=True, libver="latest") | |
| self.db_cache[hdf5_path] = [client, mode] | |
| return client | |
| def get(self, filepath: str) -> bytes: | |
| """Get values according to the filepath as bytes. | |
| Args: | |
| filepath (str): The path to the file. It consists of an HDF5 path | |
| together with the relative path inside it, e.g.: "/path/to/ | |
| file.hdf5/key/subkey/data". If no .hdf5 given inside filepath, | |
| the function will search for the first .hdf5 file present in | |
| the path, i.e. "/path/to/file/key/subkey/data" will also /key/ | |
| subkey/data from /path/to/file.hdf5. | |
| Raises: | |
| FileNotFoundError: If no suitable file exists. | |
| ValueError: If key not found inside hdf5 file. | |
| Returns: | |
| bytes: The file content in bytes | |
| """ | |
| hdf5_path, keys = self._get_hdf5_path(filepath) | |
| if not os.path.exists(hdf5_path): | |
| raise FileNotFoundError( | |
| f"Corresponding HDF5 file not found:" f" {filepath}" | |
| ) | |
| value_buf = self._get_client(hdf5_path, "r") | |
| url = "/".join(reversed(keys)) | |
| while keys: | |
| value_buf = value_buf.get(keys.pop()) | |
| if value_buf is None: | |
| raise ValueError(f"Value {url} not found in {hdf5_path}!") | |
| return bytes(value_buf[()]) | |
| def isfile(self, filepath: str) -> bool: | |
| """Check if filepath is a file. | |
| Args: | |
| filepath (str): Path to file. | |
| Raises: | |
| FileNotFoundError: If no suitable file exists. | |
| ValueError: If key not found inside hdf5 file. | |
| Returns: | |
| bool: True if file exists, False otherwise. | |
| """ | |
| hdf5_path, keys = self._get_hdf5_path(filepath) | |
| if not os.path.exists(hdf5_path): | |
| raise FileNotFoundError( | |
| f"Corresponding HDF5 file not found:" f" {filepath}" | |
| ) | |
| value_buf = self._get_client(hdf5_path, "r") | |
| url = "/".join(reversed(keys)) | |
| while keys: | |
| value_buf = value_buf.get(keys.pop()) | |
| if value_buf is None: | |
| raise ValueError(f"Value {url} not found in {hdf5_path}!") | |
| return not isinstance(value_buf, h5py.Group) | |
| def listdir(self, filepath: str) -> list[str]: | |
| """List all files in the given directory. | |
| Args: | |
| filepath (str): Path to directory. | |
| Raises: | |
| FileNotFoundError: If no suitable file exists. | |
| ValueError: If key not found inside hdf5 file. | |
| Returns: | |
| list[str]: List of files in the given directory. | |
| """ | |
| hdf5_path, keys = self._get_hdf5_path(filepath) | |
| if not os.path.exists(hdf5_path): | |
| raise FileNotFoundError( | |
| f"Corresponding HDF5 file not found:" f" {filepath}" | |
| ) | |
| value_buf = self._get_client(hdf5_path, "r") | |
| url = "/".join(reversed(keys)) | |
| while keys: | |
| value_buf = value_buf.get(keys.pop()) | |
| if value_buf is None: | |
| raise ValueError(f"Value {url} not found in {hdf5_path}!") | |
| if not isinstance(value_buf, h5py.Group): | |
| raise ValueError(f"Value {url} is not a group in {hdf5_path}!") | |
| return sorted(list(value_buf.keys())) | |
| def close(self) -> None: | |
| """Close all opened HDF5 files.""" | |
| for client, _ in self.db_cache.values(): | |
| client.close() | |
| self.db_cache.clear() | |