Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

App Files Files Community

pszemraj commited on Oct 5, 2022

Commit

c13ffb4

1 Parent(s): 665f924

💄

Browse files

Signed-off-by: peter szemraj <[email protected]>

Files changed (1) hide show

app.py +33 -34

app.py CHANGED Viewed

@@ -72,7 +72,7 @@ def proc_submission(
         # create elaborate HTML warning
         input_wc = re.split(r"\s+", input_text)
         msg = f"""
-        <div style="background-color: #f44336; color: white; padding: 20px;">
         <h3>Warning</h3>
         <p>Input text was truncated to {max_input_length} words. This is about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
         </div>
@@ -104,7 +104,7 @@ def proc_submission(
     html = ""
     html += f"<p>Runtime: {rt} minutes on CPU</p>"
     if msg is not None:
-        html += f"<h2>WARNING:</h2><hr><b>{msg}</b><br><br>"
     html += ""
@@ -225,36 +225,7 @@ if __name__ == "__main__":
                     label="Beam Search: # of Beams",
                     value=2,
                 )
-            gr.Markdown(
-                "_The base model is less performant than the large model, but is faster and will accept up to 2048 words per input (Large model accepts up to 768)._"
-            )
-            with gr.Row():
-                length_penalty = gr.inputs.Slider(
-                    minimum=0.5,
-                    maximum=1.0,
-                    label="length penalty",
-                    default=0.7,
-                    step=0.05,
-                )
-                token_batch_length = gr.Radio(
-                    choices=[512, 768, 1024, 1536],
-                    label="token batch length",
-                    value=1024,
-                )
-            with gr.Row():
-                repetition_penalty = gr.inputs.Slider(
-                    minimum=1.0,
-                    maximum=5.0,
-                    label="repetition penalty",
-                    default=3.5,
-                    step=0.1,
-                )
-                no_repeat_ngram_size = gr.Radio(
-                    choices=[2, 3, 4],
-                    label="no repeat ngram size",
-                    value=3,
-                )
             with gr.Row():
                 example_name = gr.Dropdown(
                     list(name_to_path.keys()),
@@ -268,10 +239,10 @@ if __name__ == "__main__":
                 label="Input Text (for summarization)",
                 placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
             )
-            gr.Markdown("Upload your own file:")
             with gr.Row():
                 uploaded_file = gr.File(
-                    label="Upload a text file",
                     file_count="single",
                     type="file",
                 )
@@ -302,9 +273,37 @@ if __name__ == "__main__":
             )
             gr.Markdown("---")
         with gr.Column():
-            gr.Markdown("## About the Model")
             gr.Markdown(
                 "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
             )

         # create elaborate HTML warning
         input_wc = re.split(r"\s+", input_text)
         msg = f"""
+        <div style="background-color: #FFA500; color: white; padding: 20px;">
         <h3>Warning</h3>
         <p>Input text was truncated to {max_input_length} words. This is about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
         </div>
     html = ""
     html += f"<p>Runtime: {rt} minutes on CPU</p>"
     if msg is not None:
+        html += msg
     html += ""
                     label="Beam Search: # of Beams",
                     value=2,
                 )
             with gr.Row():
                 example_name = gr.Dropdown(
                     list(name_to_path.keys()),
                 label="Input Text (for summarization)",
                 placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
             )
+            gr.Markdown("Upload a file (`.txt` or `.pdf`)")
             with gr.Row():
                 uploaded_file = gr.File(
+                    label="Upload file",
                     file_count="single",
                     type="file",
                 )
             )
             gr.Markdown("---")
+        with gr.Column():
+            gr.Markdown("### Advanced Settings")
+            with gr.Row():
+                length_penalty = gr.inputs.Slider(
+                    minimum=0.5,
+                    maximum=1.0,
+                    label="length penalty",
+                    default=0.7,
+                    step=0.05,
+                )
+                token_batch_length = gr.Radio(
+                    choices=[512, 768, 1024, 1536],
+                    label="token batch length",
+                    value=1024,
+                )
+            with gr.Row():
+                repetition_penalty = gr.inputs.Slider(
+                    minimum=1.0,
+                    maximum=5.0,
+                    label="repetition penalty",
+                    default=3.5,
+                    step=0.1,
+                )
+                no_repeat_ngram_size = gr.Radio(
+                    choices=[2, 3, 4],
+                    label="no repeat ngram size",
+                    value=3,
+                )
         with gr.Column():
+            gr.Markdown("### About the Model")
             gr.Markdown(
                 "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
             )