Update app.py
Browse files
app.py
CHANGED
|
@@ -207,7 +207,7 @@ def quantize(model_path, repo_id, quant_method=None):
|
|
| 207 |
|
| 208 |
return final_path
|
| 209 |
|
| 210 |
-
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods):
|
| 211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
| 212 |
readme_template = """---
|
| 213 |
tags:
|
|
@@ -235,13 +235,42 @@ datasets:
|
|
| 235 |
- quant_methods: {quant_methods}
|
| 236 |
- created_at: {created_at}
|
| 237 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
""".format(
|
| 239 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
| 240 |
base_model_name=base_model_name,
|
| 241 |
lora_model_name=lora_model_name,
|
| 242 |
repo_name=repo_name,
|
| 243 |
quant_methods=quant_methods,
|
| 244 |
-
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
|
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
with open(readme_path, "w") as f:
|
|
@@ -284,7 +313,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
| 284 |
for quant_method in quant_methods:
|
| 285 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
| 286 |
|
| 287 |
-
create_readme(repo_name, base_model_name, lora_model_name, quant_methods)
|
| 288 |
|
| 289 |
# 上传合并后的模型和量化模型
|
| 290 |
api.upload_large_folder(
|
|
|
|
| 207 |
|
| 208 |
return final_path
|
| 209 |
|
| 210 |
+
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username):
|
| 211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
| 212 |
readme_template = """---
|
| 213 |
tags:
|
|
|
|
| 235 |
- quant_methods: {quant_methods}
|
| 236 |
- created_at: {created_at}
|
| 237 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
| 238 |
+
|
| 239 |
+
## Usage:
|
| 240 |
+
```python
|
| 241 |
+
|
| 242 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 243 |
+
|
| 244 |
+
model_path = "{username}/{repo_name}"
|
| 245 |
+
|
| 246 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 247 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 248 |
+
model_path,
|
| 249 |
+
device_map="auto",
|
| 250 |
+
torch_dtype='auto'
|
| 251 |
+
).eval()
|
| 252 |
+
|
| 253 |
+
# Prompt content: "hi"
|
| 254 |
+
messages = [
|
| 255 |
+
{"role": "user", "content": "hi"}
|
| 256 |
+
]
|
| 257 |
+
|
| 258 |
+
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
|
| 259 |
+
output_ids = model.generate(input_ids.to('cuda'))
|
| 260 |
+
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
|
| 261 |
+
|
| 262 |
+
# Model response: "Hello! How can I assist you today?"
|
| 263 |
+
print(response)
|
| 264 |
+
```
|
| 265 |
""".format(
|
| 266 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
| 267 |
base_model_name=base_model_name,
|
| 268 |
lora_model_name=lora_model_name,
|
| 269 |
repo_name=repo_name,
|
| 270 |
quant_methods=quant_methods,
|
| 271 |
+
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
| 272 |
+
username=username,
|
| 273 |
+
repo_name=repo_name
|
| 274 |
)
|
| 275 |
|
| 276 |
with open(readme_path, "w") as f:
|
|
|
|
| 313 |
for quant_method in quant_methods:
|
| 314 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
| 315 |
|
| 316 |
+
create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username)
|
| 317 |
|
| 318 |
# 上传合并后的模型和量化模型
|
| 319 |
api.upload_large_folder(
|