Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,9 +45,9 @@ model = Llama(
|
|
| 45 |
n_parts=1,
|
| 46 |
#n_batch=100,
|
| 47 |
logits_all=True,
|
| 48 |
-
|
| 49 |
verbose=True,
|
| 50 |
-
|
| 51 |
n_gqa=8 #must be set for 70b models
|
| 52 |
)
|
| 53 |
|
|
@@ -129,12 +129,12 @@ def generate_search_request():
|
|
| 129 |
parameters = data.get("parameters", {})
|
| 130 |
|
| 131 |
# Extract parameters from the request
|
| 132 |
-
temperature = 0.01
|
| 133 |
truncate = parameters.get("truncate", 1000)
|
| 134 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
| 135 |
-
top_p = 0.
|
| 136 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
| 137 |
-
top_k =
|
| 138 |
return_full_text = parameters.get("return_full_text", False)
|
| 139 |
|
| 140 |
|
|
@@ -168,12 +168,12 @@ def generate_response():
|
|
| 168 |
parameters = data.get("parameters", {})
|
| 169 |
|
| 170 |
# Extract parameters from the request
|
| 171 |
-
temperature =
|
| 172 |
truncate = parameters.get("truncate", 1000)
|
| 173 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
| 174 |
-
top_p =
|
| 175 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
| 176 |
-
top_k =
|
| 177 |
return_full_text = parameters.get("return_full_text", False)
|
| 178 |
|
| 179 |
|
|
|
|
| 45 |
n_parts=1,
|
| 46 |
#n_batch=100,
|
| 47 |
logits_all=True,
|
| 48 |
+
n_threads=12,
|
| 49 |
verbose=True,
|
| 50 |
+
n_gpu_layers=35,
|
| 51 |
n_gqa=8 #must be set for 70b models
|
| 52 |
)
|
| 53 |
|
|
|
|
| 129 |
parameters = data.get("parameters", {})
|
| 130 |
|
| 131 |
# Extract parameters from the request
|
| 132 |
+
temperature = parameters.get("temperature", 0.01)
|
| 133 |
truncate = parameters.get("truncate", 1000)
|
| 134 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
| 135 |
+
top_p = parameters.get("top_p", 0.85)
|
| 136 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
| 137 |
+
top_k = parameters.get("top_k", 30)
|
| 138 |
return_full_text = parameters.get("return_full_text", False)
|
| 139 |
|
| 140 |
|
|
|
|
| 168 |
parameters = data.get("parameters", {})
|
| 169 |
|
| 170 |
# Extract parameters from the request
|
| 171 |
+
temperature = parameters.get("temperature", 0.01)
|
| 172 |
truncate = parameters.get("truncate", 1000)
|
| 173 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
| 174 |
+
top_p = parameters.get("top_p", 0.85)
|
| 175 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
| 176 |
+
top_k = parameters.get("top_k", 30)
|
| 177 |
return_full_text = parameters.get("return_full_text", False)
|
| 178 |
|
| 179 |
|