Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,9 @@ import os
|
|
| 9 |
|
| 10 |
import subprocess
|
| 11 |
subprocess.run('pip install flash-attn==2.7.4.post1 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 12 |
-
os.system("pip install transformers")
|
| 13 |
os.system("pip install numpy==1.26.4")
|
| 14 |
-
os.system("pip install vllm")
|
| 15 |
import sys
|
| 16 |
import threading
|
| 17 |
import re
|
|
@@ -36,7 +36,7 @@ def parse_args():
|
|
| 36 |
parser.add_argument(
|
| 37 |
"--backend",
|
| 38 |
type=str,
|
| 39 |
-
default="
|
| 40 |
choices=["transformers", "vllm"],
|
| 41 |
help="Backend to use for inference",
|
| 42 |
)
|
|
|
|
| 9 |
|
| 10 |
import subprocess
|
| 11 |
subprocess.run('pip install flash-attn==2.7.4.post1 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 12 |
+
os.system("pip install transformers==4.51.1")
|
| 13 |
os.system("pip install numpy==1.26.4")
|
| 14 |
+
os.system("pip install vllm==0.8.2")
|
| 15 |
import sys
|
| 16 |
import threading
|
| 17 |
import re
|
|
|
|
| 36 |
parser.add_argument(
|
| 37 |
"--backend",
|
| 38 |
type=str,
|
| 39 |
+
default="transformers",
|
| 40 |
choices=["transformers", "vllm"],
|
| 41 |
help="Backend to use for inference",
|
| 42 |
)
|