astegaras commited on
Commit
af8d9d1
·
verified ·
1 Parent(s): 529b49e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -8,12 +8,16 @@ model_path = hf_hub_download(
8
  filename="llama-3.2-3b-instruct.Q2_K.gguf"
9
  )
10
 
11
- # Load the GGUF model with llama.cpp
12
  llm = Llama(
13
  model_path=model_path,
14
- n_ctx=4096, # Context window for inference
15
- n_threads=8, # Adjust to HF hardware
16
- n_batch=512,
 
 
 
 
17
  verbose=False
18
  )
19
 
 
8
  filename="llama-3.2-3b-instruct.Q2_K.gguf"
9
  )
10
 
11
+ # Load GGUF with safe HF settings
12
  llm = Llama(
13
  model_path=model_path,
14
+ n_ctx=4096,
15
+ n_threads=4,
16
+ n_batch=64,
17
+ n_gpu_layers=0, # IMPORTANT
18
+ use_mmap=False, # IMPORTANT
19
+ use_mlock=False, # IMPORTANT
20
+ low_vram=True, # IMPORTANT
21
  verbose=False
22
  )
23