Spaces:
Running
on
Zero
Running
on
Zero
| # Necessary imports | |
| import sys | |
| from typing import Any, Dict | |
| import gradio as gr | |
| import spaces | |
| # Local imports | |
| from src.utils.video_processing import encode_video | |
| from src.config import ( | |
| device, | |
| model_name, | |
| sampling, | |
| stream, | |
| repetition_penalty, | |
| ) | |
| from src.app.model import load_model_tokenizer_and_processor | |
| from src.logger import logging | |
| from src.exception import CustomExceptionHandling | |
| # Model, tokenizer and processor | |
| model, tokenizer, processor = load_model_tokenizer_and_processor(model_name, device) | |
| def describe_video( | |
| video: str, | |
| question: str, | |
| temperature: float, | |
| top_p: float, | |
| top_k: int, | |
| max_new_tokens: int, | |
| ) -> str: | |
| """ | |
| Describes a video by generating an answer to a given question. | |
| Args: | |
| - video (str): The path to the video file. | |
| - question (str): The question to be answered about the video. | |
| - temperature (float): The temperature parameter for the model. | |
| - top_p (float): The top_p parameter for the model. | |
| - top_k (int): The top_k parameter for the model. | |
| - max_new_tokens (int): The max tokens to be generated by the model. | |
| Returns: | |
| str: The generated answer to the question. | |
| """ | |
| try: | |
| # Check if video or question is None | |
| if not video or not question: | |
| gr.Warning("Please provide a video and a question.") | |
| # Encode the video frames | |
| frames = encode_video(video) | |
| # Message format for the model | |
| msgs = [{"role": "user", "content": frames + [question]}] | |
| # Set decode params for video | |
| params: Dict[str, Any] = { | |
| "use_image_id": False, | |
| "max_slice_nums": 1, # Use 1 if CUDA OOM and video resolution > 448*448 | |
| } | |
| # Generate the answer | |
| answer = model.chat( | |
| image=None, | |
| msgs=msgs, | |
| tokenizer=tokenizer, | |
| processor=processor, | |
| sampling=sampling, | |
| stream=stream, | |
| top_p=top_p, | |
| top_k=top_k, | |
| temperature=temperature, | |
| repetition_penalty=repetition_penalty, | |
| max_new_tokens=max_new_tokens, | |
| **params | |
| ) | |
| # Log the successful generation of the answer | |
| logging.info("Answer generated successfully.") | |
| # Return the answer | |
| return "".join(answer) | |
| # Handle exceptions that may occur during answer generation | |
| except Exception as e: | |
| # Custom exception handling | |
| raise CustomExceptionHandling(e, sys) from e | |