Spaces:

jatinmehra
/

PDF-Insight-PRO

Running

Jatin Mehra

Refactor and reorganize codebase for improved maintainability and clarity

ba907cd 6 months ago

2.81 kB

	"""
	Pydantic models and data structures for PDF Insight Beta application.

	This module defines all the data models used throughout the application.
	"""

	from typing import List, Dict, Any, Optional
	from pydantic import BaseModel, Field


	class ChatRequest(BaseModel):
	"""Request model for chat endpoint."""
	session_id: str = Field(..., description="Session identifier")
	query: str = Field(..., description="User query")
	use_search: bool = Field(default=False, description="Whether to use web search")
	model_name: str = Field(
	default="meta-llama/llama-4-scout-17b-16e-instruct",
	description="LLM model to use"
	)


	class SessionRequest(BaseModel):
	"""Request model for session-related endpoints."""
	session_id: str = Field(..., description="Session identifier")


	class UploadResponse(BaseModel):
	"""Response model for PDF upload."""
	status: str
	session_id: str
	message: str


	class ChatResponse(BaseModel):
	"""Response model for chat endpoint."""
	status: str
	answer: str
	context_used: List[Dict[str, Any]]


	class ChatHistoryResponse(BaseModel):
	"""Response model for chat history endpoint."""
	status: str
	history: List[Dict[str, str]]


	class StatusResponse(BaseModel):
	"""Generic status response model."""
	status: str
	message: str


	class ErrorResponse(BaseModel):
	"""Error response model."""
	status: str
	detail: str
	type: Optional[str] = None


	class ModelInfo(BaseModel):
	"""Model information."""
	id: str
	name: str


	class ModelsResponse(BaseModel):
	"""Response model for models endpoint."""
	models: List[ModelInfo]


	class ChunkMetadata(BaseModel):
	"""Metadata for document chunks."""
	source: Optional[str] = None
	page: Optional[int] = None

	class Config:
	extra = "allow" # Allow additional metadata fields


	class DocumentChunk(BaseModel):
	"""Document chunk with text and metadata."""
	text: str
	metadata: ChunkMetadata

	def to_dict(self) -> Dict[str, Any]:
	"""Convert to dictionary format used in processing."""
	return {
	"text": self.text,
	"metadata": self.metadata.dict()
	}


	class SessionData(BaseModel):
	"""Session data structure."""
	file_path: str
	file_name: str
	chunks: List[Dict[str, Any]] # List of chunk dictionaries
	chat_history: List[Dict[str, str]] = Field(default_factory=list)

	class Config:
	arbitrary_types_allowed = True # Allow non-Pydantic types like FAISS index


	class ChatHistoryEntry(BaseModel):
	"""Single chat history entry."""
	user: str
	assistant: str


	class ContextChunk(BaseModel):
	"""Context chunk with similarity score."""
	text: str
	score: float
	metadata: Dict[str, Any]