Spaces:

marquesafonso
/

multilang-asr-captioner

Sleeping

App Files Files Community

marquesafonso commited on Aug 22, 2024

Commit

d0c0836

1 Parent(s): 2dcfc88

add height ratio for better caption mode. pydantic v2 field validation. add archiver to main.

Browse files

Files changed (6) hide show

docker-compose.yml +0 -2
main.py +12 -6
static/submit_video.html +2 -2
utils/archiver.py +1 -1
utils/process_video.py +3 -3
utils/subtitler.py +12 -9

docker-compose.yml CHANGED Viewed

@@ -1,5 +1,3 @@
-version: '3.8'
 services:
   app:
     build:

 services:
   app:
     build:

main.py CHANGED Viewed

@@ -2,11 +2,12 @@ from fastapi import FastAPI, UploadFile, HTTPException, Form, Depends
 from fastapi.responses import FileResponse, HTMLResponse
 from fastapi.security import HTTPBasic, HTTPBasicCredentials
 from typing import Optional
-from pydantic import BaseModel, validator
 from utils.process_video import process_video
 from utils.zip_response import zip_response
 from utils.api_configs import api_configs
 from utils.read_html import read_html
 import shutil, os, logging, uvicorn, secrets
 #TODO: upgrade project dependencies for the soon to be released version of faster-whisper that supports distil-largev3
@@ -42,7 +43,7 @@ class MP4Video(BaseModel):
     def file(self):
         return self.video_file.file
-    @validator('video_file')
     def validate_video_file(cls, v):
         if not v.filename.endswith('.mp4'):
             raise HTTPException(status_code=500, detail='Invalid video file type. Please upload an MP4 file.')
@@ -61,7 +62,7 @@ class SRTFile(BaseModel):
     def size(self):
         return self.srt_file.size
-    @validator('srt_file')
     def validate_srt_file(cls, v):
         if v.size > 0 and not v.filename.endswith('.srt'):
             raise HTTPException(status_code=422, detail='Invalid subtitle file type. Please upload an SRT file.')
@@ -92,11 +93,12 @@ async def process_video_api(video_file: MP4Video = Depends(),
                             font: Optional[str] = Form("FuturaPTHeavy"),
                             bg_color: Optional[str] = Form("#070a13b3"),
                             text_color: Optional[str] = Form("white"),
-                            caption_width: Optional[str] = Form("desktop"),
                             username: str = Depends(get_current_user)
                             ):
     try:
         logging.info("Creating temporary directories")
         temp_dir = os.path.join(os.getcwd(),"temp")
         os.makedirs(temp_dir, exist_ok=True)
         temp_vid_dir = os.path.join(temp_dir,video_file.filename.split('.')[0])
@@ -117,12 +119,12 @@ async def process_video_api(video_file: MP4Video = Depends(),
                 finally:
                     srt_file.file.close()
             logging.info("Processing the video...")
-            output_path, _ = process_video(temp_input_path, SRT_PATH, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_width)
             logging.info("Zipping response...")
             zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
             return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
         logging.info("Processing the video...")
-        output_path, srt_path = process_video(temp_input_path, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_width)
         logging.info("Zipping response...")
         zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
         return  FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
@@ -132,4 +134,8 @@ async def process_video_api(video_file: MP4Video = Depends(),
 if __name__ == "__main__":
     # Use Uvicorn to run the application
     uvicorn.run(app, host="0.0.0.0", port=8000)

 from fastapi.responses import FileResponse, HTMLResponse
 from fastapi.security import HTTPBasic, HTTPBasicCredentials
 from typing import Optional
+from pydantic import BaseModel, field_validator
 from utils.process_video import process_video
 from utils.zip_response import zip_response
 from utils.api_configs import api_configs
 from utils.read_html import read_html
+from utils.archiver import archiver
 import shutil, os, logging, uvicorn, secrets
 #TODO: upgrade project dependencies for the soon to be released version of faster-whisper that supports distil-largev3
     def file(self):
         return self.video_file.file
+    @field_validator('video_file')
     def validate_video_file(cls, v):
         if not v.filename.endswith('.mp4'):
             raise HTTPException(status_code=500, detail='Invalid video file type. Please upload an MP4 file.')
     def size(self):
         return self.srt_file.size
+    @field_validator('srt_file')
     def validate_srt_file(cls, v):
         if v.size > 0 and not v.filename.endswith('.srt'):
             raise HTTPException(status_code=422, detail='Invalid subtitle file type. Please upload an SRT file.')
                             font: Optional[str] = Form("FuturaPTHeavy"),
                             bg_color: Optional[str] = Form("#070a13b3"),
                             text_color: Optional[str] = Form("white"),
+                            caption_mode: Optional[str] = Form("desktop"),
                             username: str = Depends(get_current_user)
                             ):
     try:
         logging.info("Creating temporary directories")
+        print(caption_mode)
         temp_dir = os.path.join(os.getcwd(),"temp")
         os.makedirs(temp_dir, exist_ok=True)
         temp_vid_dir = os.path.join(temp_dir,video_file.filename.split('.')[0])
                 finally:
                     srt_file.file.close()
             logging.info("Processing the video...")
+            output_path, _ = process_video(temp_input_path, SRT_PATH, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
             logging.info("Zipping response...")
             zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
             return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
         logging.info("Processing the video...")
+        output_path, srt_path = process_video(temp_input_path, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
         logging.info("Zipping response...")
         zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
         return  FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
 if __name__ == "__main__":
     # Use Uvicorn to run the application
+    try:
+        archiver()
+    except FileNotFoundError:
+        pass
     uvicorn.run(app, host="0.0.0.0", port=8000)

static/submit_video.html CHANGED Viewed

@@ -177,8 +177,8 @@
                     <input type="text" id="bg_color" name="bg_color" value="#00FFFF00"><br>
                     <label for="text_color">Text color</label>
                     <input type="text" id="text_color" name="text_color" value="white"><br>
-                    <label for="caption_width">Caption width</label>
-                    <select id="caption_width" name="caption_width">
                         <option value="desktop">Desktop</option>
                         <option value="mobile">Mobile</option>
                     </select>

                     <input type="text" id="bg_color" name="bg_color" value="#00FFFF00"><br>
                     <label for="text_color">Text color</label>
                     <input type="text" id="text_color" name="text_color" value="white"><br>
+                    <label for="caption_mode">Caption mode</label>
+                    <select id="caption_mode" name="caption_mode">
                         <option value="desktop">Desktop</option>
                         <option value="mobile">Mobile</option>
                     </select>

utils/archiver.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import shutil, os
 from datetime import datetime
-def archiver(timestamp:datetime):
     TIME = f"{timestamp.year:4d}-{timestamp.month:02d}-{timestamp.day:02d}_{timestamp.hour:02d}-{timestamp.minute:02d}"
     ARCHIVE = os.path.abspath(f"archive/{TIME}")
     TEMP_DIR = os.path.abspath("temp/")

 import shutil, os
 from datetime import datetime
+def archiver(timestamp:datetime=datetime.now()):
     TIME = f"{timestamp.year:4d}-{timestamp.month:02d}-{timestamp.day:02d}_{timestamp.hour:02d}-{timestamp.minute:02d}"
     ARCHIVE = os.path.abspath(f"archive/{TIME}")
     TEMP_DIR = os.path.abspath("temp/")

utils/process_video.py CHANGED Viewed

@@ -19,14 +19,14 @@ def process_video(invideo_filename:str,
                   font:str,
                   bg_color:str,
                   text_color:str,
-                  caption_width:str
                   ):
     invideo_filename = os.path.normpath(invideo_filename)
     invideo_path_parts = invideo_filename.split(os.path.sep)
     VIDEO_NAME = invideo_path_parts[-1]
     OUTVIDEO_PATH = os.path.join(invideo_path_parts[-3], invideo_path_parts[-2], f"result_{VIDEO_NAME}")
     if srt_path:
-        subtitler(invideo_filename, srt_path, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_width)
         return OUTVIDEO_PATH, srt_path
     logging.info("Converting Video to Audio")
     INAUDIO_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.m4a")
@@ -37,5 +37,5 @@ def process_video(invideo_filename:str,
     if not os.path.exists(SRT_PATH):
         transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line, task)
     logging.info("Subtitling...")
-    subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_width)
     return OUTVIDEO_PATH, SRT_PATH

                   font:str,
                   bg_color:str,
                   text_color:str,
+                  caption_mode:str
                   ):
     invideo_filename = os.path.normpath(invideo_filename)
     invideo_path_parts = invideo_filename.split(os.path.sep)
     VIDEO_NAME = invideo_path_parts[-1]
     OUTVIDEO_PATH = os.path.join(invideo_path_parts[-3], invideo_path_parts[-2], f"result_{VIDEO_NAME}")
     if srt_path:
+        subtitler(invideo_filename, srt_path, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
         return OUTVIDEO_PATH, srt_path
     logging.info("Converting Video to Audio")
     INAUDIO_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.m4a")
     if not os.path.exists(SRT_PATH):
         transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line, task)
     logging.info("Subtitling...")
+    subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     return OUTVIDEO_PATH, SRT_PATH

utils/subtitler.py CHANGED Viewed

@@ -19,12 +19,14 @@ def parse_srt(srt_file):
             i += 1
     return subtitles
-def filter_caption_width(caption_width:str='desktop'):
-    if caption_width == 'desktop':
-        caption_width = 0.2
-    elif caption_width == 'mobile':
-        caption_width = 0.5
-    return caption_width
 def subtitler(video_file:str,
             srt_path:str,
@@ -33,7 +35,7 @@ def subtitler(video_file:str,
             font: str,
             bg_color:str,
             text_color:str,
-            caption_width:str
             ):
     """Add subtitles from an SRT file to a video."""
     video_file = os.path.abspath(video_file)
@@ -42,14 +44,15 @@ def subtitler(video_file:str,
     clip = VideoFileClip(filename=video_file, target_resolution=None)
     subtitles = parse_srt(srt_path)
     subtitle_clips = []
     for start, end, text in subtitles:
         # Create TextClip with specified styling
         # To get a list of possible color and font values run: print(TextClip.list("font"), '\n\n', TextClip.list("color"))
         txt_clip = TextClip(text, fontsize=fontsize, color=text_color, font=font, method='caption',
-                            bg_color=bg_color, align='center', size=(clip.w*filter_caption_width(caption_width), None))
         txt_clip = txt_clip.set_position(('center', 'bottom')).set_duration(clip.duration).set_start(start).set_end(end)
         subtitle_x_position = 'center'
-        subtitle_y_position = clip.h * 4 / 5
         text_position = (subtitle_x_position, subtitle_y_position)
         subtitle_clips.append(txt_clip.set_position(text_position))
     video = CompositeVideoClip(size=None, clips=[clip] + subtitle_clips)

             i += 1
     return subtitles
+def filter_caption_width(caption_mode:str):
+    if caption_mode == 'desktop':
+        caption_width_ratio = 0.5
+        caption_height_ratio = 0.8
+    elif caption_mode == 'mobile':
+        caption_width_ratio = 0.2
+        caption_height_ratio = 0.7
+    return caption_width_ratio, caption_height_ratio
 def subtitler(video_file:str,
             srt_path:str,
             font: str,
             bg_color:str,
             text_color:str,
+            caption_mode:str
             ):
     """Add subtitles from an SRT file to a video."""
     video_file = os.path.abspath(video_file)
     clip = VideoFileClip(filename=video_file, target_resolution=None)
     subtitles = parse_srt(srt_path)
     subtitle_clips = []
+    caption_width_ratio, caption_height_ratio = filter_caption_width(caption_mode)
     for start, end, text in subtitles:
         # Create TextClip with specified styling
         # To get a list of possible color and font values run: print(TextClip.list("font"), '\n\n', TextClip.list("color"))
         txt_clip = TextClip(text, fontsize=fontsize, color=text_color, font=font, method='caption',
+                            bg_color=bg_color, align='center', size=(clip.w*caption_width_ratio, None))
         txt_clip = txt_clip.set_position(('center', 'bottom')).set_duration(clip.duration).set_start(start).set_end(end)
         subtitle_x_position = 'center'
+        subtitle_y_position = clip.h * caption_height_ratio
         text_position = (subtitle_x_position, subtitle_y_position)
         subtitle_clips.append(txt_clip.set_position(text_position))
     video = CompositeVideoClip(size=None, clips=[clip] + subtitle_clips)