Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| os.system("ulimit -l unlimited") | |
| app = FastAPI() | |
| hf_hub_download("TheBloke/deepseek-coder-6.7B-base-GGUF", "deepseek-coder-6.7b-base.Q4_K_M.gguf", local_dir="./") | |
| model_l = Llama(model_path="./deepseek-coder-6.7b-base.Q4_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True) | |
| async def index(): | |
| return {"msg": "OK!"} | |
| async def completion(request: Request): | |
| data = await request.json() | |
| prompt = data["prompt"] | |
| res = model_l( | |
| prompt, | |
| temperature=0.6, | |
| echo=False, | |
| max_tokens=41, | |
| ) | |
| return {"responses": res["choices"]} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |