Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

App Files Files Community

pszemraj commited on Apr 30, 2023

Commit

bd3ba15

1 Parent(s): e9ed1f2

🔊 update logs

Browse files

Signed-off-by: peter szemraj <[email protected]>

Files changed (1) hide show

app.py +14 -10

app.py CHANGED Viewed

@@ -19,7 +19,8 @@ os.environ[
 ] = "false"  # parallelism on tokenizers is buggy with gradio
 logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
 import gradio as gr
@@ -232,18 +233,20 @@ def load_uploaded_file(file_obj, max_pages: int = 20, lower: bool = False) -> st
     :param bool lower: whether to lowercase the text
     :return str: the text of the file
     """
     # check if mysterious file object is a list
     if isinstance(file_obj, list):
         file_obj = file_obj[0]
     file_path = Path(file_obj.name)
     try:
-        logging.info(f"Loading file:\t{file_path}")
         if file_path.suffix == ".txt":
             with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                 raw_text = f.read()
             text = clean(raw_text, lower=lower)
         elif file_path.suffix == ".pdf":
-            logging.info(f"loading as PDF file {file_path}")
             conversion_stats = convert_PDF_to_Text(
                 file_path,
                 ocr_model=ocr_model,
@@ -251,18 +254,19 @@ def load_uploaded_file(file_obj, max_pages: int = 20, lower: bool = False) -> st
             )
             text = conversion_stats["converted_text"]
         else:
-            logging.error(f"Unknown file type {file_path.suffix}")
             text = "ERROR - check file - unknown file type"
         return text
     except Exception as e:
-        logging.error(f"Trying to load file:\t{file_path},\nerror:\t{e}")
         return "Error: Could not read file. Ensure that it is a valid text file with encoding UTF-8 if text, and a PDF if PDF."
 if __name__ == "__main__":
-    logging.info("Starting app instance")
-    logging.info("Loading OCR model")
     with contextlib.redirect_stdout(None):
         ocr_model = ocr_predictor(
             "db_resnet50",
@@ -271,7 +275,7 @@ if __name__ == "__main__":
             assume_straight_pages=True,
         )
     name_to_path = load_example_filenames(_here / "examples")
-    logging.info(f"Loaded {len(name_to_path)} examples")
     demo = gr.Blocks()
     _examples = list(name_to_path.keys())
     with demo:
@@ -355,7 +359,7 @@ if __name__ == "__main__":
                     minimum=0.5,
                     maximum=1.0,
                     label="length penalty",
-                    default=0.7,
                     step=0.05,
                 )
                 token_batch_length = gr.Radio(
@@ -369,7 +373,7 @@ if __name__ == "__main__":
                     minimum=1.0,
                     maximum=5.0,
                     label="repetition penalty",
-                    default=3.5,
                     step=0.1,
                 )
                 no_repeat_ngram_size = gr.Radio(

 ] = "false"  # parallelism on tokenizers is buggy with gradio
 logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
 )
 import gradio as gr
     :param bool lower: whether to lowercase the text
     :return str: the text of the file
     """
+    logger = logging.getLogger(__name__)
     # check if mysterious file object is a list
     if isinstance(file_obj, list):
         file_obj = file_obj[0]
     file_path = Path(file_obj.name)
     try:
+        logger.info(f"Loading file:\t{file_path}")
         if file_path.suffix == ".txt":
             with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                 raw_text = f.read()
             text = clean(raw_text, lower=lower)
         elif file_path.suffix == ".pdf":
+            logger.info(f"loading as PDF file {file_path}")
             conversion_stats = convert_PDF_to_Text(
                 file_path,
                 ocr_model=ocr_model,
             )
             text = conversion_stats["converted_text"]
         else:
+            logger.error(f"Unknown file type {file_path.suffix}")
             text = "ERROR - check file - unknown file type"
         return text
     except Exception as e:
+        logger.error(f"Trying to load file:\t{file_path},\nerror:\t{e}")
         return "Error: Could not read file. Ensure that it is a valid text file with encoding UTF-8 if text, and a PDF if PDF."
 if __name__ == "__main__":
+    logger = logging.getLogger(__name__)
+    logger.info("Starting app instance")
+    logger.info("Loading OCR model")
     with contextlib.redirect_stdout(None):
         ocr_model = ocr_predictor(
             "db_resnet50",
             assume_straight_pages=True,
         )
     name_to_path = load_example_filenames(_here / "examples")
+    logger.info(f"Loaded {len(name_to_path)} examples")
     demo = gr.Blocks()
     _examples = list(name_to_path.keys())
     with demo:
                     minimum=0.5,
                     maximum=1.0,
                     label="length penalty",
+                    value=0.7,
                     step=0.05,
                 )
                 token_batch_length = gr.Radio(
                     minimum=1.0,
                     maximum=5.0,
                     label="repetition penalty",
+                    value=1.5,
                     step=0.1,
                 )
                 no_repeat_ngram_size = gr.Radio(