Spaces:

marquesafonso
/

multilang-asr-captioner

Sleeping

App Files Files Community

marquesafonso commited on Jul 18, 2024

Commit

2dcfc88

1 Parent(s): f489fc8

improve form layout, styling and responsiveness. added a caption_width param for small screens. fixed resolution issue.

Browse files

Files changed (5) hide show

main.py +3 -2
static/submit_video.html +115 -41
utils/process_video.py +4 -3
utils/subtitler.py +12 -4
utils/transcriber.py +2 -5

main.py CHANGED Viewed

@@ -92,6 +92,7 @@ async def process_video_api(video_file: MP4Video = Depends(),
                             font: Optional[str] = Form("FuturaPTHeavy"),
                             bg_color: Optional[str] = Form("#070a13b3"),
                             text_color: Optional[str] = Form("white"),
                             username: str = Depends(get_current_user)
                             ):
     try:
@@ -116,12 +117,12 @@ async def process_video_api(video_file: MP4Video = Depends(),
                 finally:
                     srt_file.file.close()
             logging.info("Processing the video...")
-            output_path, _ = process_video(temp_input_path, SRT_PATH, task, max_words_per_line, fontsize, font, bg_color, text_color)
             logging.info("Zipping response...")
             zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
             return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
         logging.info("Processing the video...")
-        output_path, srt_path = process_video(temp_input_path, None, task, max_words_per_line, fontsize, font, bg_color, text_color)
         logging.info("Zipping response...")
         zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
         return  FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")

                             font: Optional[str] = Form("FuturaPTHeavy"),
                             bg_color: Optional[str] = Form("#070a13b3"),
                             text_color: Optional[str] = Form("white"),
+                            caption_width: Optional[str] = Form("desktop"),
                             username: str = Depends(get_current_user)
                             ):
     try:
                 finally:
                     srt_file.file.close()
             logging.info("Processing the video...")
+            output_path, _ = process_video(temp_input_path, SRT_PATH, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_width)
             logging.info("Zipping response...")
             zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, SRT_PATH])
             return FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")
         logging.info("Processing the video...")
+        output_path, srt_path = process_video(temp_input_path, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_width)
         logging.info("Zipping response...")
         zip_path = zip_response(os.path.join(temp_vid_dir,"archive.zip"), [output_path, srt_path])
         return  FileResponse(zip_path, media_type='application/zip', filename=f"result_{video_file.filename.split('.')[0]}.zip")

static/submit_video.html CHANGED Viewed

@@ -9,49 +9,70 @@
                 background-color: #f0f0f0;
                 color: #333;
                 line-height: 1.6;
             }
             form {
-                max-width: 400px;
-                margin: 10px auto;
-                padding: 4px 20px;
                 background: #ffffff;
                 border-radius: 8px;
-                box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
-                padding-bottom: 0.5rem;
             }
             input[type=file],
             input[type=number],
-            input[type=text] {
-                width: 95%;
-                padding: 10px;
-                margin-bottom: 10px;
-                border-radius: 4px;
-                border: 1px solid #ddd;
-                box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
-            }
             select {
-                width: 30%;
                 padding: 10px;
                 margin-bottom: 10px;
                 border-radius: 4px;
                 border: 1px solid #ddd;
                 box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
             }
             input[type=submit] {
-                width: 25%;
                 background-color: #4CAF50;
                 color: white;
-                padding: 7.5px 15px;
                 border: none;
                 border-radius: 5px;
                 cursor: pointer;
-                font-size: 12px;
-                display: block;
-                margin-right: auto;
-                margin-left: auto;
             }
             input[type=submit]:hover {
@@ -61,13 +82,13 @@
             label {
                 margin-top: 10px;
                 display: block;
             }
             .footer {
                 width: 100%;
                 background-color: #f0f0f0;
-                padding: 0;
-                position: absolute;
                 text-align: center;
             }
@@ -77,40 +98,93 @@
             }
             .fa-github:hover {
-                transform: scale(1.2)
             }
-            .fa-github{
-                color: #000000
             }
             .fa-linkedin:hover {
-                transform: scale(1.2)
             }
             .fa-linkedin {
-                color: #0077B5
             }
             /* Additional Responsiveness */
             @media (max-width: 768px) {
                 form {
-                    width: 90%;
                 }
             }
         </style>
     </head>
     <body>
         <form action="/process_video/" enctype="multipart/form-data" method="post">
-            Video File: <input type="file" name="video_file"><br>
-            Subtitles File: <input type="file" name="srt_file"><br>
-            <label for="task">Task</label>
-            <select id="task" name="task">
-                <option value="transcribe">Transcribe</option>
-                <option value="translate">Translate</option>
-            </select><br>
-            Max words per line: <input type="number" name="max_words_per_line" value="6"><br>
-            Font size: <input type="number" name="fontsize" value="42"><br>
-            Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
-            Background color (Pro tip: #00FFFF00 = transparent): <input type="text" name="bg_color" value="#070a13b3"><br>
-            Text color: <input type="text" name="text_color" value="white"><br>
-            <input type="submit">
        </form>
         <!-- Footer -->
         <div class="footer">

                 background-color: #f0f0f0;
                 color: #333;
                 line-height: 1.6;
+                margin: 0;
+                padding: 0;
+                display: flex;
+                flex-direction: column;
+                min-height: 100vh;
             }
             form {
+                max-width: 900px;
+                margin: .9rem auto;
+                padding: 1rem;
                 background: #ffffff;
                 border-radius: 8px;
+                box-shadow: 0 0 15px rgba(0, 0, 0, 0.1);
+                display: flex;
+                flex-direction: column;
+            }
+            .form-wrapper {
+                display: flex;
+                flex-wrap: wrap;
+                gap: 20px;
+            }
+            .form-group {
+                flex: 1;
+                min-width: calc(50% - 20px);
+                box-sizing: border-box;
+            }
+            .form-group h3 {
+                margin-bottom: 15px;
+                color: #4CAF50;
+                font-size: 18px;
+                border-bottom: 2px solid #4CAF50;
+                padding-bottom: 5px;
             }
             input[type=file],
             input[type=number],
+            input[type=text],
             select {
+                width: 100%;
                 padding: 10px;
                 margin-bottom: 10px;
                 border-radius: 4px;
                 border: 1px solid #ddd;
                 box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
+                font-size: 13px;
+                box-sizing: border-box; /* Ensure padding and border are included in the element's total width and height */
             }
             input[type=submit] {
+                width: 100%;
                 background-color: #4CAF50;
                 color: white;
+                padding: 12px 18px;
                 border: none;
                 border-radius: 5px;
                 cursor: pointer;
+                font-size: 15px;
+                margin-top: 20px;
+                transition: background-color 0.3s ease;
+                box-sizing: border-box;
             }
             input[type=submit]:hover {
             label {
                 margin-top: 10px;
                 display: block;
+                font-weight: bold;
+                font-size: 13px;
             }
             .footer {
                 width: 100%;
                 background-color: #f0f0f0;
                 text-align: center;
             }
             }
             .fa-github:hover {
+                transform: scale(1.2);
             }
+            .fa-github {
+                color: #000000;
             }
             .fa-linkedin:hover {
+                transform: scale(1.2);
             }
             .fa-linkedin {
+                color: #0077B5;
             }
             /* Additional Responsiveness */
+            @media (max-width: 992px) {
+                form {
+                    max-width: 90%;
+                    margin-left: 15%;
+                    margin-right: 15%;
+                    padding: 15px;
+                }
+                .form-wrapper {
+                    flex-direction: column;
+                }
+                .form-group {
+                    min-width: 100%;
+                }
+            }
             @media (max-width: 768px) {
                 form {
+                    max-width: 90%;
+                    margin-left: 10%;
+                    margin-right: 10%;
+                    padding: 15px;
+                }
+            }
+            @media (max-width: 480px) {
+                form {
+                    max-width: 90%;
+                    margin-left: 5%;
+                    margin-right: 5%;
+                    padding: 10px;
                 }
             }
         </style>
     </head>
     <body>
         <form action="/process_video/" enctype="multipart/form-data" method="post">
+            <div class="form-wrapper">
+                <div class="form-group">
+                    <h3>Inputs & Task Selection</h3>
+                    <label for="video_file">Video File</label>
+                    <input type="file" id="video_file" name="video_file"><br>
+                    <label for="srt_file">Subtitles File</label>
+                    <input type="file" id="srt_file" name="srt_file"><br>
+                    <label for="task">Task</label>
+                    <select id="task" name="task">
+                        <option value="transcribe">Transcribe</option>
+                        <option value="translate">Translate</option>
+                    </select>
+                </div>
+                <div class="form-group">
+                    <h3>Visual Parameters</h3>
+                    <label for="max_words_per_line">Max words per line</label>
+                    <input type="number" id="max_words_per_line" name="max_words_per_line" value="6"><br>
+                    <label for="fontsize">Font size</label>
+                    <input type="number" id="fontsize" name="fontsize" value="42"><br>
+                    <label for="font">Font:</label>
+                    <input type="text" id="font" name="font" value="FuturaPTHeavy"><br>
+                    <label for="bg_color">Background color</label>
+                    <input type="text" id="bg_color" name="bg_color" value="#00FFFF00"><br>
+                    <label for="text_color">Text color</label>
+                    <input type="text" id="text_color" name="text_color" value="white"><br>
+                    <label for="caption_width">Caption width</label>
+                    <select id="caption_width" name="caption_width">
+                        <option value="desktop">Desktop</option>
+                        <option value="mobile">Mobile</option>
+                    </select>
+                </div>
+            </div>
+            <input type="submit" value="Submit">
        </form>
         <!-- Footer -->
         <div class="footer">

utils/process_video.py CHANGED Viewed

@@ -18,14 +18,15 @@ def process_video(invideo_filename:str,
                   fontsize:str,
                   font:str,
                   bg_color:str,
-                  text_color:str
                   ):
     invideo_filename = os.path.normpath(invideo_filename)
     invideo_path_parts = invideo_filename.split(os.path.sep)
     VIDEO_NAME = invideo_path_parts[-1]
     OUTVIDEO_PATH = os.path.join(invideo_path_parts[-3], invideo_path_parts[-2], f"result_{VIDEO_NAME}")
     if srt_path:
-        subtitler(invideo_filename, srt_path, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
         return OUTVIDEO_PATH, srt_path
     logging.info("Converting Video to Audio")
     INAUDIO_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.m4a")
@@ -36,5 +37,5 @@ def process_video(invideo_filename:str,
     if not os.path.exists(SRT_PATH):
         transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line, task)
     logging.info("Subtitling...")
-    subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
     return OUTVIDEO_PATH, SRT_PATH

                   fontsize:str,
                   font:str,
                   bg_color:str,
+                  text_color:str,
+                  caption_width:str
                   ):
     invideo_filename = os.path.normpath(invideo_filename)
     invideo_path_parts = invideo_filename.split(os.path.sep)
     VIDEO_NAME = invideo_path_parts[-1]
     OUTVIDEO_PATH = os.path.join(invideo_path_parts[-3], invideo_path_parts[-2], f"result_{VIDEO_NAME}")
     if srt_path:
+        subtitler(invideo_filename, srt_path, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_width)
         return OUTVIDEO_PATH, srt_path
     logging.info("Converting Video to Audio")
     INAUDIO_PATH = os.path.abspath(f"{invideo_filename.split('.')[0]}.m4a")
     if not os.path.exists(SRT_PATH):
         transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line, task)
     logging.info("Subtitling...")
+    subtitler(invideo_filename, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_width)
     return OUTVIDEO_PATH, SRT_PATH

utils/subtitler.py CHANGED Viewed

@@ -19,6 +19,12 @@ def parse_srt(srt_file):
             i += 1
     return subtitles
 def subtitler(video_file:str,
             srt_path:str,
@@ -26,23 +32,25 @@ def subtitler(video_file:str,
             fontsize:int,
             font: str,
             bg_color:str,
-            text_color:str
             ):
     """Add subtitles from an SRT file to a video."""
     video_file = os.path.abspath(video_file)
     srt_path = os.path.abspath(srt_path)
     output_file = os.path.abspath(output_file)
-    clip = VideoFileClip(filename=video_file, target_resolution=None, resize_algorithm='bitexact')
     subtitles = parse_srt(srt_path)
     subtitle_clips = []
     for start, end, text in subtitles:
         # Create TextClip with specified styling
         txt_clip = TextClip(text, fontsize=fontsize, color=text_color, font=font, method='caption',
-                            bg_color=bg_color, align='center', size=(clip.w*1/2, None))
         txt_clip = txt_clip.set_position(('center', 'bottom')).set_duration(clip.duration).set_start(start).set_end(end)
         subtitle_x_position = 'center'
         subtitle_y_position = clip.h * 4 / 5
         text_position = (subtitle_x_position, subtitle_y_position)
         subtitle_clips.append(txt_clip.set_position(text_position))
-    video = CompositeVideoClip(size=(clip.h,clip.w), clips=[clip] + subtitle_clips)
     video.write_videofile(output_file, codec='libx264', audio_codec='aac')

             i += 1
     return subtitles
+def filter_caption_width(caption_width:str='desktop'):
+    if caption_width == 'desktop':
+        caption_width = 0.2
+    elif caption_width == 'mobile':
+        caption_width = 0.5
+    return caption_width
 def subtitler(video_file:str,
             srt_path:str,
             fontsize:int,
             font: str,
             bg_color:str,
+            text_color:str,
+            caption_width:str
             ):
     """Add subtitles from an SRT file to a video."""
     video_file = os.path.abspath(video_file)
     srt_path = os.path.abspath(srt_path)
     output_file = os.path.abspath(output_file)
+    clip = VideoFileClip(filename=video_file, target_resolution=None)
     subtitles = parse_srt(srt_path)
     subtitle_clips = []
     for start, end, text in subtitles:
         # Create TextClip with specified styling
+        # To get a list of possible color and font values run: print(TextClip.list("font"), '\n\n', TextClip.list("color"))
         txt_clip = TextClip(text, fontsize=fontsize, color=text_color, font=font, method='caption',
+                            bg_color=bg_color, align='center', size=(clip.w*filter_caption_width(caption_width), None))
         txt_clip = txt_clip.set_position(('center', 'bottom')).set_duration(clip.duration).set_start(start).set_end(end)
         subtitle_x_position = 'center'
         subtitle_y_position = clip.h * 4 / 5
         text_position = (subtitle_x_position, subtitle_y_position)
         subtitle_clips.append(txt_clip.set_position(text_position))
+    video = CompositeVideoClip(size=None, clips=[clip] + subtitle_clips)
     video.write_videofile(output_file, codec='libx264', audio_codec='aac')

utils/transcriber.py CHANGED Viewed

@@ -14,12 +14,9 @@ def convert_seconds_to_time(seconds):
     hours, remainder = divmod(seconds, 3600)
     minutes, remainder = divmod(remainder, 60)
     whole_seconds = int(remainder)
-    milliseconds = int((remainder - whole_seconds) * 1000)
-    # Format the time string
-    time_string = f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02},{milliseconds:03}"
-    return time_string
 def write_srt(segments, srt_path, max_words_per_line):
     """Write segments to an SRT file with a maximum number of words per line."""

     hours, remainder = divmod(seconds, 3600)
     minutes, remainder = divmod(remainder, 60)
     whole_seconds = int(remainder)
+    milliseconds = int((remainder - whole_seconds) * 1000)
+    return f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02},{milliseconds:03}"
 def write_srt(segments, srt_path, max_words_per_line):
     """Write segments to an SRT file with a maximum number of words per line."""