Spaces:

Steven10429
/

apply_lora_and_quantize

Paused

App Files Files Community

Steven10429 commited on Feb 13

Commit

1bd381b

verified ·

1 Parent(s): 9710d36

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -3

app.py CHANGED Viewed

@@ -207,7 +207,7 @@ def quantize(model_path, repo_id, quant_method=None):
     return final_path
-def create_readme(repo_name, base_model_name, lora_model_name, quant_methods):
     readme_path = os.path.join("output", repo_name, "README.md")
     readme_template = """---
 tags:
@@ -235,13 +235,42 @@ datasets:
 - quant_methods: {quant_methods}
 - created_at: {created_at}
 - created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
 """.format(
         quantization="\n- quantization" if len(quant_methods) > 0 else "",
         base_model_name=base_model_name,
         lora_model_name=lora_model_name,
         repo_name=repo_name,
         quant_methods=quant_methods,
-        created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     )
     with open(readme_path, "w") as f:
@@ -284,7 +313,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
         for quant_method in quant_methods:
             quantize(output_dir, repo_name, quant_method=quant_method)
-        create_readme(repo_name, base_model_name, lora_model_name, quant_methods)
         # 上传合并后的模型和量化模型
         api.upload_large_folder(

     return final_path
+def create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username):
     readme_path = os.path.join("output", repo_name, "README.md")
     readme_template = """---
 tags:
 - quant_methods: {quant_methods}
 - created_at: {created_at}
 - created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
+## Usage:
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_path = "{username}/{repo_name}"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    torch_dtype='auto'
+).eval()
+# Prompt content: "hi"
+messages = [
+    {"role": "user", "content": "hi"}
+]
+input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
+output_ids = model.generate(input_ids.to('cuda'))
+response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
+# Model response: "Hello! How can I assist you today?"
+print(response)
+```
 """.format(
         quantization="\n- quantization" if len(quant_methods) > 0 else "",
         base_model_name=base_model_name,
         lora_model_name=lora_model_name,
         repo_name=repo_name,
         quant_methods=quant_methods,
+        created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+        username=username,
+        repo_name=repo_name
     )
     with open(readme_path, "w") as f:
         for quant_method in quant_methods:
             quantize(output_dir, repo_name, quant_method=quant_method)
+        create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username)
         # 上传合并后的模型和量化模型
         api.upload_large_folder(