Spaces:

aiqtech
/

rag

Sleeping

App Files Files Community

rag / app.py

aiqtech

Update app.py

b3f1dd2 verified 3 months ago

raw

history blame

10.4 kB

	import os
	import json
	import asyncio
	from typing import Optional, List, Dict
	from contextlib import asynccontextmanager

	import requests
	import uvicorn
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field
	import gradio as gr


	# Pydantic 모델 정의
	class Message(BaseModel):
	role: str
	content: str


	class ChatRequest(BaseModel):
	messages: List[Message]
	model: str = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
	max_tokens: int = Field(default=4096, ge=1, le=8192)
	temperature: float = Field(default=0.6, ge=0, le=2)
	top_p: float = Field(default=1.0, ge=0, le=1)
	top_k: int = Field(default=40, ge=1, le=100)
	presence_penalty: float = Field(default=0, ge=-2, le=2)
	frequency_penalty: float = Field(default=0, ge=-2, le=2)


	class ChatResponse(BaseModel):
	response: str
	model: str
	tokens_used: Optional[int] = None


	# Fireworks API 클라이언트
	class FireworksClient:
	def __init__(self, api_key: Optional[str] = None):
	self.api_key = api_key or os.getenv("FIREWORKS_API_KEY")
	if not self.api_key:
	raise ValueError("API key is required. Set FIREWORKS_API_KEY environment variable.")

	self.base_url = "https://api.fireworks.ai/inference/v1/chat/completions"
	self.headers = {
	"Accept": "application/json",
	"Content-Type": "application/json",
	"Authorization": f"Bearer {self.api_key}"
	}

	def chat(self, request: ChatRequest) -> Dict:
	"""Fireworks API에 채팅 요청을 보냅니다."""
	payload = {
	"model": request.model,
	"max_tokens": request.max_tokens,
	"top_p": request.top_p,
	"top_k": request.top_k,
	"presence_penalty": request.presence_penalty,
	"frequency_penalty": request.frequency_penalty,
	"temperature": request.temperature,
	"messages": [msg.dict() for msg in request.messages]
	}

	try:
	response = requests.post(
	self.base_url,
	headers=self.headers,
	data=json.dumps(payload),
	timeout=30
	)
	response.raise_for_status()
	return response.json()
	except requests.exceptions.RequestException as e:
	raise HTTPException(status_code=500, detail=f"API request failed: {str(e)}")


	# Gradio 앱 생성
	def create_gradio_app(client: FireworksClient):
	"""Gradio 인터페이스를 생성합니다."""

	def chat_with_llm(
	message: str,
	history: List[List[str]],
	model: str,
	temperature: float,
	max_tokens: int,
	top_p: float,
	top_k: int
	):
	"""Gradio 채팅 함수"""
	if not message:
	return "", history

	# 대화 기록을 Message 형식으로 변환
	messages = []
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append(Message(role="user", content=user_msg))
	if assistant_msg:
	messages.append(Message(role="assistant", content=assistant_msg))

	# 현재 메시지 추가
	messages.append(Message(role="user", content=message))

	# API 요청
	try:
	request = ChatRequest(
	messages=messages,
	model=model,
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	top_k=top_k
	)

	response = client.chat(request)

	# 응답에서 텍스트 추출
	if "choices" in response and len(response["choices"]) > 0:
	assistant_response = response["choices"][0]["message"]["content"]
	else:
	assistant_response = "응답을 받을 수 없습니다."

	# 히스토리 업데이트
	history.append([message, assistant_response])
	return "", history

	except Exception as e:
	error_msg = f"오류 발생: {str(e)}"
	history.append([message, error_msg])
	return "", history

	# Gradio 인터페이스 구성
	with gr.Blocks(title="LLM Chat Interface") as demo:
	gr.Markdown("# 🚀 Fireworks LLM Chat Interface")
	gr.Markdown("Qwen3-235B 모델을 사용한 채팅 인터페이스입니다.")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	height=500,
	label="채팅 창"
	)
	msg = gr.Textbox(
	label="메시지 입력",
	placeholder="메시지를 입력하세요...",
	lines=2
	)
	with gr.Row():
	submit = gr.Button("전송", variant="primary")
	clear = gr.Button("대화 초기화")

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ 설정")
	model = gr.Textbox(
	label="모델",
	value="accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
	interactive=True
	)
	temperature = gr.Slider(
	minimum=0,
	maximum=2,
	value=0.6,
	step=0.1,
	label="Temperature"
	)
	max_tokens = gr.Slider(
	minimum=100,
	maximum=8192,
	value=4096,
	step=100,
	label="Max Tokens"
	)
	top_p = gr.Slider(
	minimum=0,
	maximum=1,
	value=1.0,
	step=0.1,
	label="Top P"
	)
	top_k = gr.Slider(
	minimum=1,
	maximum=100,
	value=40,
	step=1,
	label="Top K"
	)

	# 이벤트 핸들러
	submit.click(
	chat_with_llm,
	inputs=[msg, chatbot, model, temperature, max_tokens, top_p, top_k],
	outputs=[msg, chatbot]
	)

	msg.submit(
	chat_with_llm,
	inputs=[msg, chatbot, model, temperature, max_tokens, top_p, top_k],
	outputs=[msg, chatbot]
	)

	clear.click(lambda: None, None, chatbot, queue=False)

	return demo


	# FastAPI 앱 설정
	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""앱 시작/종료 시 실행되는 함수"""
	# 시작 시
	print("🚀 Starting FastAPI + Gradio server...")
	yield
	# 종료 시
	print("👋 Shutting down server...")


	app = FastAPI(
	title="LLM API with Gradio Interface",
	description="Fireworks LLM API with Gradio testing interface",
	version="1.0.0",
	lifespan=lifespan
	)

	# CORS 설정
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Fireworks 클라이언트 초기화
	try:
	fireworks_client = FireworksClient()
	except ValueError as e:
	print(f"⚠️ Warning: {e}")
	print("API endpoints will not work without a valid API key.")
	fireworks_client = None


	# API 엔드포인트
	@app.get("/")
	async def root():
	"""루트 엔드포인트"""
	return {
	"message": "LLM API Server is running",
	"endpoints": {
	"api": "/chat",
	"gradio": "/gradio",
	"docs": "/docs"
	}
	}


	@app.post("/chat", response_model=ChatResponse)
	async def chat(request: ChatRequest):
	"""채팅 API 엔드포인트"""
	if not fireworks_client:
	raise HTTPException(status_code=500, detail="API key not configured")

	try:
	response = fireworks_client.chat(request)

	# 응답 파싱
	if "choices" in response and len(response["choices"]) > 0:
	content = response["choices"][0]["message"]["content"]
	tokens = response.get("usage", {}).get("total_tokens")

	return ChatResponse(
	response=content,
	model=request.model,
	tokens_used=tokens
	)
	else:
	raise HTTPException(status_code=500, detail="Invalid response from API")

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	@app.get("/health")
	async def health_check():
	"""헬스 체크 엔드포인트"""
	return {
	"status": "healthy",
	"api_configured": fireworks_client is not None
	}


	# Gradio 앱 마운트
	if fireworks_client:
	gradio_app = create_gradio_app(fireworks_client)
	app = gr.mount_gradio_app(app, gradio_app, path="/gradio")


	# 메인 실행
	if __name__ == "__main__":
	import sys

	# API 키 확인
	if not os.getenv("FIREWORKS_API_KEY"):
	print("⚠️ 경고: FIREWORKS_API_KEY 환경변수가 설정되지 않았습니다.")
	print("설정 방법:")
	print(" Linux/Mac: export FIREWORKS_API_KEY='your-api-key'")
	print(" Windows: set FIREWORKS_API_KEY=your-api-key")
	print("")

	# 선택적으로 API 키 입력받기
	api_key = input("API 키를 입력하세요 (Enter를 누르면 건너뜁니다): ").strip()
	if api_key:
	os.environ["FIREWORKS_API_KEY"] = api_key
	fireworks_client = FireworksClient(api_key)
	gradio_app = create_gradio_app(fireworks_client)
	app = gr.mount_gradio_app(app, gradio_app, path="/gradio")

	# 서버 시작
	print("\n🚀 서버를 시작합니다...")
	print("📍 API 문서: http://localhost:7860/docs")
	print("💬 Gradio UI: http://localhost:7860/gradio")
	print("🔧 API 엔드포인트: http://localhost:7860/chat")

	uvicorn.run(
	app,
	host="0.0.0.0",
	port=7860,
	reload=False
	)