astegaras commited on
Commit
442c9db
·
verified ·
1 Parent(s): 7e05dd1

updated app again

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -12,15 +12,22 @@ llm = Llama(
12
  model_path=model_path,
13
  n_ctx=2048,
14
  n_gpu_layers=0,
 
15
  )
16
 
17
- def respond(prompt):
 
 
 
18
  out = llm.create_completion(
19
  prompt=prompt,
20
  max_tokens=256,
21
  temperature=0.7,
22
  top_p=0.9,
23
  )
 
 
24
  return out["choices"][0]["text"]
25
 
26
  gr.Interface(fn=respond, inputs="text", outputs="text").launch()
 
 
12
  model_path=model_path,
13
  n_ctx=2048,
14
  n_gpu_layers=0,
15
+ chat_format=None, # <-- CRITICAL: disable chat templates
16
  )
17
 
18
+ def respond(user_question):
19
+ # Format prompt exactly like your training data
20
+ prompt = f"Q: {user_question}\nA:"
21
+
22
  out = llm.create_completion(
23
  prompt=prompt,
24
  max_tokens=256,
25
  temperature=0.7,
26
  top_p=0.9,
27
  )
28
+
29
+ # Return the generated answer
30
  return out["choices"][0]["text"]
31
 
32
  gr.Interface(fn=respond, inputs="text", outputs="text").launch()
33
+