kunhaokhliu commited on
Commit
8222adf
·
1 Parent(s): 51bf1d8
Files changed (2) hide show
  1. app.py +5 -0
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import os
2
  import argparse
3
  import time
@@ -16,6 +20,7 @@ from huggingface_hub import snapshot_download, hf_hub_download
16
  # -----------------------------
17
  # Globals (loaded once per process)
18
  # -----------------------------
 
19
  _PIPELINE: Optional[torch.nn.Module] = None
20
  _DEVICE: Optional[torch.device] = None
21
 
 
1
+
2
+ import subprocess
3
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
+
5
  import os
6
  import argparse
7
  import time
 
20
  # -----------------------------
21
  # Globals (loaded once per process)
22
  # -----------------------------
23
+
24
  _PIPELINE: Optional[torch.nn.Module] = None
25
  _DEVICE: Optional[torch.device] = None
26
 
requirements.txt CHANGED
@@ -39,5 +39,4 @@ flask-socketio
39
  torchao
40
  tensorboard
41
  ninja
42
- packaging
43
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.5cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
 
39
  torchao
40
  tensorboard
41
  ninja
42
+ packaging