Update README.md
Browse files
README.md
CHANGED
|
@@ -59,7 +59,7 @@ python3 -m vllm.entrypoints.openai.api_server \
|
|
| 59 |
--dtype auto \
|
| 60 |
--api-key token-abc123 \
|
| 61 |
--quantization compressed-tensors \
|
| 62 |
-
--max-num-batched-tokens
|
| 63 |
-
--max-model-len
|
| 64 |
--tensor-parallel-size 2 \
|
| 65 |
--gpu-memory-utilization 0.99
|
|
|
|
| 59 |
--dtype auto \
|
| 60 |
--api-key token-abc123 \
|
| 61 |
--quantization compressed-tensors \
|
| 62 |
+
--max-num-batched-tokens 16384 \
|
| 63 |
+
--max-model-len 16384 \
|
| 64 |
--tensor-parallel-size 2 \
|
| 65 |
--gpu-memory-utilization 0.99
|