thinkingnew commited on
Commit
715110f
·
1 Parent(s): be65975

Initial LLaMA API commit

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +22 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 as the base image (change to 3.9 if needed)
2
+ FROM python:3.10
3
+
4
+ # Create a non-root user
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV PATH="/home/user/.local/bin:$PATH"
8
+
9
+ # Set working directory
10
+ WORKDIR /app
11
+
12
+ # Copy and install dependencies first (helps with caching)
13
+ COPY --chown=user requirements.txt .
14
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
+
16
+ # Copy the rest of the files
17
+ COPY --chown=user . .
18
+
19
+ # Expose the API port (7860 for Hugging Face Spaces)
20
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
21
+
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from peft import PeftModel
4
+ import torch
5
+
6
+ app = FastAPI()
7
+
8
+ # Load Model from Hugging Face Hub
9
+ base_model_path = "NousResearch/Hermes-3-Llama-3.2-3B"
10
+ adapter_path = "thinkingnew/llama_invs_adapter/adapter"
11
+
12
+ base_model = AutoModelForCausalLM.from_pretrained(
13
+ base_model_path, torch_dtype=torch.float16, device_map="auto"
14
+ )
15
+ model = PeftModel.from_pretrained(base_model, adapter_path)
16
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path)
17
+
18
+ @app.post("/generate/")
19
+ async def generate_text(prompt: str):
20
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)
21
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
22
+ return {"response": result[0]['generated_text']}
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ transformers
4
+ torch
5
+ peft