Spaces:

astegaras
/

iris

Sleeping

astegaras commited on 14 days ago

Commit

af8d9d1

verified ·

1 Parent(s): 529b49e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,12 +8,16 @@ model_path = hf_hub_download(
     filename="llama-3.2-3b-instruct.Q2_K.gguf"
 )
-# Load the GGUF model with llama.cpp
 llm = Llama(
     model_path=model_path,
-    n_ctx=4096,       # Context window for inference
-    n_threads=8,      # Adjust to HF hardware
-    n_batch=512,
     verbose=False
 )

     filename="llama-3.2-3b-instruct.Q2_K.gguf"
 )
+# Load GGUF with safe HF settings
 llm = Llama(
     model_path=model_path,
+    n_ctx=4096,
+    n_threads=4,
+    n_batch=64,
+    n_gpu_layers=0,     # IMPORTANT
+    use_mmap=False,     # IMPORTANT
+    use_mlock=False,    # IMPORTANT
+    low_vram=True,      # IMPORTANT
     verbose=False
 )