Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| from subprocess import run | |
| from typing import Generator | |
| BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp" | |
| def convert( | |
| cache_folder: Path, model_id: str, precision: str, quantization: bool | |
| ) -> Generator[str, Path, None]: | |
| # Conversion | |
| cmd = [ | |
| "python", | |
| str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"), | |
| model_id, | |
| str(cache_folder), | |
| ] | |
| if precision == "FP32": | |
| cmd.append("--use-fp32") | |
| yield f"Running command: `{' '.join(cmd)}`" | |
| run(cmd, check=True) | |
| # Model file should exist | |
| f_suffix = "f32" if precision == "FP32" else "f16" | |
| _, model_name = model_id.split("/") | |
| model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin" | |
| assert model_path.is_file() | |
| yield f"Model successfully converted to ggml: {model_path}" | |
| # Quantization | |
| if quantization: | |
| q_model_path = ( | |
| cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin" | |
| ) | |
| cmd = [ | |
| "./bloomz.cpp/quantize", | |
| str(model_path), | |
| str(q_model_path), | |
| "2", | |
| ] | |
| yield f"Running command: `{' '.join(cmd)}`" | |
| run(cmd, check=True) | |
| assert q_model_path.is_file() | |
| # Delete non-quantized file | |
| model_path.unlink(missing_ok=True) | |
| model_path = q_model_path | |
| yield f"Model successfully quantized: {model_path}" | |
| # Return | |
| return model_path | |