llama1

Runtime error

thinkingnew commited on Mar 27

Commit

715110f

1 Parent(s): be65975

Initial LLaMA API commit

Files changed (3) hide show

Dockerfile ADDED Viewed

+# Use Python 3.10 as the base image (change to 3.9 if needed)
+FROM python:3.10
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+# Set working directory
+WORKDIR /app
+# Copy and install dependencies first (helps with caching)
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy the rest of the files
+COPY --chown=user . .
+# Expose the API port (7860 for Hugging Face Spaces)
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

+from fastapi import FastAPI
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from peft import PeftModel
+import torch
+app = FastAPI()
+# Load Model from Hugging Face Hub
+base_model_path = "NousResearch/Hermes-3-Llama-3.2-3B"
+adapter_path = "thinkingnew/llama_invs_adapter/adapter"
+base_model = AutoModelForCausalLM.from_pretrained(
+    base_model_path, torch_dtype=torch.float16, device_map="auto"
+)
+model = PeftModel.from_pretrained(base_model, adapter_path)
+tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+@app.post("/generate/")
+async def generate_text(prompt: str):
+    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)
+    result = pipe(f"<s>[INST] {prompt} [/INST]")
+    return {"response": result[0]['generated_text']}

requirements.txt ADDED Viewed

+fastapi
+uvicorn[standard]
+transformers
+torch
+peft