Upload folder using huggingface_hub
Browse files
README.md
CHANGED
|
@@ -19,7 +19,7 @@ Note: This model is in BF16; quantized MXFP4 FFN is not used.
|
|
| 19 |
- vLLM
|
| 20 |
|
| 21 |
```bash
|
| 22 |
-
vllm serve yujiepan/gpt-oss-
|
| 23 |
```
|
| 24 |
|
| 25 |
- Transformers
|
|
@@ -28,7 +28,7 @@ vllm serve yujiepan/gpt-oss-bf16-tiny-random
|
|
| 28 |
import torch
|
| 29 |
from transformers import pipeline
|
| 30 |
|
| 31 |
-
model_id = "yujiepan/gpt-oss-
|
| 32 |
|
| 33 |
pipe = pipeline(
|
| 34 |
"text-generation",
|
|
@@ -67,7 +67,7 @@ from transformers import (
|
|
| 67 |
)
|
| 68 |
|
| 69 |
source_model_id = "openai/gpt-oss-120b"
|
| 70 |
-
save_folder = "/tmp/yujiepan/gpt-oss-
|
| 71 |
|
| 72 |
processor = AutoProcessor.from_pretrained(source_model_id)
|
| 73 |
processor.save_pretrained(save_folder)
|
|
@@ -106,6 +106,7 @@ with torch.no_grad():
|
|
| 106 |
model.save_pretrained(save_folder)
|
| 107 |
|
| 108 |
# mxfp4
|
|
|
|
| 109 |
# model = AutoModelForCausalLM.from_pretrained(save_folder, trust_remote_code=True, torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
| 110 |
# model.save_pretrained(save_folder, safe_serialization=True)
|
| 111 |
```
|
|
|
|
| 19 |
- vLLM
|
| 20 |
|
| 21 |
```bash
|
| 22 |
+
vllm serve yujiepan/gpt-oss-tiny-random-bf16
|
| 23 |
```
|
| 24 |
|
| 25 |
- Transformers
|
|
|
|
| 28 |
import torch
|
| 29 |
from transformers import pipeline
|
| 30 |
|
| 31 |
+
model_id = "yujiepan/gpt-oss-tiny-random-bf16"
|
| 32 |
|
| 33 |
pipe = pipeline(
|
| 34 |
"text-generation",
|
|
|
|
| 67 |
)
|
| 68 |
|
| 69 |
source_model_id = "openai/gpt-oss-120b"
|
| 70 |
+
save_folder = "/tmp/yujiepan/gpt-oss-tiny-random-bf16"
|
| 71 |
|
| 72 |
processor = AutoProcessor.from_pretrained(source_model_id)
|
| 73 |
processor.save_pretrained(save_folder)
|
|
|
|
| 106 |
model.save_pretrained(save_folder)
|
| 107 |
|
| 108 |
# mxfp4
|
| 109 |
+
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
| 110 |
# model = AutoModelForCausalLM.from_pretrained(save_folder, trust_remote_code=True, torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
| 111 |
# model.save_pretrained(save_folder, safe_serialization=True)
|
| 112 |
```
|