Spaces:

MrAli
/

codexpert_computing

Sleeping

codexpert_computing / app.py

Update app.py

96f73cf verified over 1 year ago

846 Bytes

	from fastapi import FastAPI, Request
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os
	os.system("ulimit -l unlimited")

	app = FastAPI()

	hf_hub_download("TheBloke/deepseek-coder-6.7B-base-GGUF", "deepseek-coder-6.7b-base.Q4_K_M.gguf", local_dir="./")
	model_l = Llama(model_path="./deepseek-coder-6.7b-base.Q4_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)


	@app.get("/check")
	async def index():
	return {"msg": "OK!"}

	@app.post("/api")
	async def completion(request: Request):
	data = await request.json()
	prompt = data["prompt"]

	res = model_l(
	prompt,
	temperature=0.6,
	echo=False,
	max_tokens=41,
	)
	return {"responses": res["choices"]}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)