Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

App Files Files Community

pszemraj commited on May 27, 2023

Commit

9c491e8

1 Parent(s): 9cdd2ba

✨ basic CLI

Browse files

Signed-off-by: peter szemraj <[email protected]>

Files changed (1) hide show

app.py +43 -4

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ Optional Environment Variables:
     APP_MAX_WORDS (int): the maximum number of words to use for summarization
     APP_OCR_MAX_PAGES (int): the maximum number of pages to use for OCR
 """
 import contextlib
 import gc
 import logging
@@ -72,13 +73,15 @@ aggregator = BatchAggregator("MBZUAI/LaMini-Flan-T5-783M")
 def aggregate_text(
     summary_text: str,
     text_file: gr.inputs.File = None,
-):
     """
     Aggregate the text from the batches.
         NOTE: you should probably include passing the BatchAggregator object as a parameter if using this code
         outside of this file.
     :param batches_html: The batches to aggregate, in html format
     """
     if summary_text is None or summary_text == SUMMARY_PLACEHOLDER:
         logging.error("No text provided. Make sure a summary has been generated first.")
@@ -292,7 +295,7 @@ def load_single_example_text(
     :param int max_pages: the maximum number of pages to load from a PDF
     :return str: the text of the example
     """
-    global name_to_path
     full_ex_path = name_to_path[example_path]
     full_ex_path = Path(full_ex_path)
     if full_ex_path.suffix in [".txt", ".md"]:
@@ -325,7 +328,7 @@ def load_uploaded_file(file_obj, max_pages: int = 20, lower: bool = False) -> st
     :param bool lower: whether to lowercase the text
     :return str: the text of the file
     """
     logger = logging.getLogger(__name__)
     # check if mysterious file object is a list
     if isinstance(file_obj, list):
@@ -357,8 +360,44 @@ def load_uploaded_file(file_obj, max_pages: int = 20, lower: bool = False) -> st
         return "Error: Could not read file. Ensure that it is a valid text file with encoding UTF-8 if text, and a PDF if PDF."
 if __name__ == "__main__":
     logger = logging.getLogger(__name__)
     logger.info("Starting app instance")
     logger.info("Loading OCR model")
     with contextlib.redirect_stdout(None):
@@ -538,4 +577,4 @@ if __name__ == "__main__":
             inputs=[summary_text, text_file],
             outputs=[aggregated_summary],
         )
-    demo.launch(enable_queue=True, share=True)

     APP_MAX_WORDS (int): the maximum number of words to use for summarization
     APP_OCR_MAX_PAGES (int): the maximum number of pages to use for OCR
 """
+import argparse
 import contextlib
 import gc
 import logging
 def aggregate_text(
     summary_text: str,
     text_file: gr.inputs.File = None,
+) -> str:
     """
     Aggregate the text from the batches.
         NOTE: you should probably include passing the BatchAggregator object as a parameter if using this code
         outside of this file.
     :param batches_html: The batches to aggregate, in html format
+    :param text_file: The text file to append the aggregate summary to
+    :return: The aggregate summary in html format
     """
     if summary_text is None or summary_text == SUMMARY_PLACEHOLDER:
         logging.error("No text provided. Make sure a summary has been generated first.")
     :param int max_pages: the maximum number of pages to load from a PDF
     :return str: the text of the example
     """
+    global name_to_path, ocr_model
     full_ex_path = name_to_path[example_path]
     full_ex_path = Path(full_ex_path)
     if full_ex_path.suffix in [".txt", ".md"]:
     :param bool lower: whether to lowercase the text
     :return str: the text of the file
     """
+    global ocr_model
     logger = logging.getLogger(__name__)
     # check if mysterious file object is a list
     if isinstance(file_obj, list):
         return "Error: Could not read file. Ensure that it is a valid text file with encoding UTF-8 if text, and a PDF if PDF."
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Document Summarization with Long-Document Transformers",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--share",
+        dest="share",
+        action="store_true",
+        help="Create a public link to share",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        default=None,
+        help=f"Add a custom model to the list of models: {', '.join(MODEL_OPTIONS)}",
+    )
+    parser.add_argument(
+        "-level",
+        "--log-level",
+        type=str,
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+        help="Set the logging level",
+    )
+    return parser.parse_args()
 if __name__ == "__main__":
+    """main - the main function of the app"""
     logger = logging.getLogger(__name__)
+    args = parse_args()
+    logger.setLevel(args.log_level)
+    logger.info(f"args: {args}")
+    if args.model is not None:
+        logger.info(f"Adding model {args.model} to the list of models")
+        MODEL_OPTIONS.append(args.model)
     logger.info("Starting app instance")
     logger.info("Loading OCR model")
     with contextlib.redirect_stdout(None):
             inputs=[summary_text, text_file],
             outputs=[aggregated_summary],
         )
+    demo.launch(enable_queue=True, share=args.share)