Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

App Files Files Community

pszemraj commited on Jan 23

Commit

f578dba

1 Parent(s): b07a789

✨ upgrade aggregation model

Browse files

Signed-off-by: peter szemraj <[email protected]>

Files changed (2) hide show

aggregate.py +30 -79
app.py +7 -5

aggregate.py CHANGED Viewed

@@ -1,12 +1,10 @@
 """
-aggregate.py - module for aggregating text from multiple sources/multiple parts of a single source.
-    Primary usage is through the BatchAggregator class.
-How it works:
-1. We tell the language model to do it.
-2. The language model does it.
-3. Yaay!
 """
 import logging
 import pprint as pp
 import time
@@ -14,8 +12,6 @@ import time
 import torch
 from transformers import GenerationConfig, pipeline
-from utils import compare_model_size
 # Setting up logging
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -27,42 +23,30 @@ class BatchAggregator:
     BatchAggregator is a class for aggregating text from multiple sources.
     Usage:
-    >>> from aggregate import BatchAggregator
-    >>> aggregator = BatchAggregator()
-    >>> agg = aggregator.infer_aggregate(["This is a test", "This is another test"])
-    >>> print(agg)
     """
     GENERIC_CONFIG = GenerationConfig(
-        num_beams=8,
         early_stopping=True,
         do_sample=False,
-        min_new_tokens=32,
-        max_new_tokens=256,
-        repetition_penalty=1.1,
-        length_penalty=1.4,
-        no_repeat_ngram_size=4,
-        encoder_no_repeat_ngram_size=5,
     )
-    CONFIGURED_MODELS = [
-        "pszemraj/bart-large-mnli-dolly_hhrlhf-v1",
-        "pszemraj/bart-base-instruct-dolly_hhrlhf",
-        "pszemraj/flan-t5-large-instruct-dolly_hhrlhf",
-        "pszemraj/flan-t5-base-instruct-dolly_hhrlhf",
-    ]  # these have generation configs defined for this task in their model repos
-    DEFAULT_INSTRUCTION = "Write a comprehensive yet concise summary that pulls together the main points of the following text:"
     def __init__(
         self,
-        model_name: str = "pszemraj/bart-large-mnli-dolly_hhrlhf-v1",
         force_cpu: bool = False,
         **kwargs,
     ):
         """
         __init__ initializes the BatchAggregator class.
-        :param str model_name: model name to use, default: "pszemraj/bart-large-mnli-dolly_hhrlhf-v1"
         :param bool force_cpu: force the model to run on CPU, default: False
         """
         self.device = None
@@ -87,40 +71,29 @@ class BatchAggregator:
         self.model_name = model_name
         self.aggregator = self._create_pipeline(model_name)
         self._configure_model()
-        # update the generation config with the specific tokenizer
-        tokenizer_params = {
-            "decoder_start_token_id": 0
-            if "t5" in model_name.lower()
-            else self.aggregator.tokenizer.eos_token_id,
-            "eos_token_id": 1
-            if "t5" in model_name.lower()
-            else self.aggregator.tokenizer.eos_token_id,
-            "pad_token_id": 0
-            if "t5" in model_name.lower()
-            else self.aggregator.tokenizer.pad_token_id,
-        }
-        self.update_generation_config(**tokenizer_params)
     def _create_pipeline(
-        self, model_name: str = "pszemraj/bart-large-mnli-dolly_hhrlhf-v1"
     ) -> pipeline:
         """
         _create_pipeline creates a pipeline for the model.
-        :param str model_name: model name to use, default: "pszemraj/bart-large-mnli-dolly_hhrlhf-v1"
         :return pipeline: the pipeline for the model
         :raises Exception: if the pipeline cannot be created
         """
-        self.device = 0 if torch.cuda.is_available() and not self.force_cpu else -1
         try:
             self.logger.info(
-                f"Creating pipeline with model {model_name} on device {self.device}"
             )
             return pipeline(
                 "text2text-generation",
                 model=model_name,
-                device=self.device,
                 torch_dtype=torch.float32,
             )
         except Exception as e:
@@ -137,36 +110,16 @@ class BatchAggregator:
         except Exception as e:
             self.logger.warning(f"Could not compile model with Torch 2.0: {e}")
-        if self.model_name not in self.CONFIGURED_MODELS:
-            self.logger.info("Setting generation config to general defaults")
-            self._set_default_generation_config()
-        else:
-            try:
-                self.logger.info("Loading generation config from hub")
-                self.aggregator.model.generation_config = (
-                    GenerationConfig.from_pretrained(self.model_name)
-                )
-            except Exception as e:
-                self.logger.warning(
-                    f"Could not load generation config, using defaults: {e}"
-                )
-                self._set_default_generation_config()
         self.logger.info(self.aggregator.model.generation_config.to_json_string())
     def _set_default_generation_config(self):
         """
         Set the default generation configuration for the model.
         """
-        self.aggregator.model.generation_config = self.GENERIC_CONFIG
-        if (
-            "large"
-            or "xl" in self.model_name.lower()
-            or compare_model_size(self.model_name, 500)
-        ):
-            upd = {"num_beams": 4}
-            self.update_generation_config(**upd)
     def update_generation_config(self, **kwargs):
         """
@@ -176,7 +129,6 @@ class BatchAggregator:
             **kwargs: The parameters to update in the generation configuration.
         """
         self.logger.info(f"Updating generation config with {pp.pformat(kwargs)}")
         self.aggregator.model.generation_config.update(**kwargs)
     def get_generation_config(self) -> dict:
@@ -200,33 +152,32 @@ class BatchAggregator:
     def infer_aggregate(
         self,
         text_list: list,
-        instruction: str = DEFAULT_INSTRUCTION,
         **kwargs,
     ) -> str:
-        f"""
         infer_aggregate - infers a consolidated summary from a list of texts.
         Args:
             text_list (list): The texts to summarize.
-            instruction (str): The instruction for the summary. Defaults to {self.DEFAULT_INSTRUCTION}.
             **kwargs: Additional parameters to update in the generation configuration.
         Returns:
             The generated summary.
         """
-        joined_text = "\n".join(text_list)
-        prompt = f"{instruction}\n\n{joined_text}\n"
         if kwargs:
             self.update_generation_config(**kwargs)
         st = time.perf_counter()
         self.logger.info(f"inference on {len(text_list)} texts ...")
         result = self.aggregator(
-            prompt,
             generation_config=self.aggregator.model.generation_config,
         )[0]["generated_text"]
         self.logger.info(f"Done. runtime:\t{round(time.perf_counter() - st, 2)}s")
         self.logger.info(
-            f"Input tokens:\t{self.count_tokens(prompt)}. Output tokens:\t{self.count_tokens(result)}"
         )
         self.logger.debug(f"Generated text:\n{result}")

 """
+aggregate.py - module for 'reducing' multiple 'summary chunks' into one
+an overly complicated class for legacy compatibility reasons, for usage of the
+2024 map-reduce models see hf.co/pszemraj/bart-large-summary-map-reduce#usage
 """
 import logging
 import pprint as pp
 import time
 import torch
 from transformers import GenerationConfig, pipeline
 # Setting up logging
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
     BatchAggregator is a class for aggregating text from multiple sources.
     Usage:
+        from aggregate import BatchAggregator
+        aggregator = BatchAggregator()
+        agg = aggregator.infer_aggregate(["This is a test", "This is another test"])
+        print(agg)
     """
     GENERIC_CONFIG = GenerationConfig(
+        max_new_tokens=512,
+        num_beams=4,
         early_stopping=True,
         do_sample=False,
+        truncation=True,
     )
     def __init__(
         self,
+        model_name: str = "pszemraj/bart-large-summary-map-reduce",
         force_cpu: bool = False,
         **kwargs,
     ):
         """
         __init__ initializes the BatchAggregator class.
+        :param str model_name: model name to use, default: "pszemraj/bart-large-summary-map-reduce"
         :param bool force_cpu: force the model to run on CPU, default: False
         """
         self.device = None
         self.model_name = model_name
         self.aggregator = self._create_pipeline(model_name)
         self._configure_model()
     def _create_pipeline(
+        self, model_name: str = "pszemraj/bart-large-summary-map-reduce"
     ) -> pipeline:
         """
         _create_pipeline creates a pipeline for the model.
+        :param str model_name: model name to use
         :return pipeline: the pipeline for the model
         :raises Exception: if the pipeline cannot be created
         """
+        device_map = (
+            "auto" if torch.cuda.is_available() and not self.force_cpu else "cpu"
+        )
         try:
             self.logger.info(
+                f"Creating pipeline with model {model_name} on device {device_map}"
             )
             return pipeline(
                 "text2text-generation",
                 model=model_name,
+                device_map=device_map,
                 torch_dtype=torch.float32,
             )
         except Exception as e:
         except Exception as e:
             self.logger.warning(f"Could not compile model with Torch 2.0: {e}")
+        self._set_default_generation_config()
         self.logger.info(self.aggregator.model.generation_config.to_json_string())
     def _set_default_generation_config(self):
         """
         Set the default generation configuration for the model.
         """
+        self.aggregator.model.generation_config.update(
+            **self.GENERIC_CONFIG.to_diff_dict()
+        )
     def update_generation_config(self, **kwargs):
         """
             **kwargs: The parameters to update in the generation configuration.
         """
         self.logger.info(f"Updating generation config with {pp.pformat(kwargs)}")
         self.aggregator.model.generation_config.update(**kwargs)
     def get_generation_config(self) -> dict:
     def infer_aggregate(
         self,
         text_list: list,
+        instruction: str = None,  # Kept for backward compatibility but not used
         **kwargs,
     ) -> str:
+        """
         infer_aggregate - infers a consolidated summary from a list of texts.
         Args:
             text_list (list): The texts to summarize.
+            instruction (str): Not used by this model, kept for compatibility.
             **kwargs: Additional parameters to update in the generation configuration.
         Returns:
             The generated summary.
         """
+        joined_text = "\n\n".join(text_list)
         if kwargs:
             self.update_generation_config(**kwargs)
         st = time.perf_counter()
         self.logger.info(f"inference on {len(text_list)} texts ...")
         result = self.aggregator(
+            joined_text,
             generation_config=self.aggregator.model.generation_config,
         )[0]["generated_text"]
         self.logger.info(f"Done. runtime:\t{round(time.perf_counter() - st, 2)}s")
         self.logger.info(
+            f"Input tokens:\t{self.count_tokens(joined_text)}. Output tokens:\t{self.count_tokens(result)}"
         )
         self.logger.debug(f"Generated text:\n{result}")

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ Optional Environment Variables:
     APP_MAX_WORDS (int): the maximum number of words to use for summarization
     APP_OCR_MAX_PAGES (int): the maximum number of pages to use for OCR
 """
 import argparse
 import contextlib
 import gc
@@ -77,7 +78,7 @@ TOKEN_BATCH_OPTIONS = [
 ]  # token batch sizes users can choose from
 SUMMARY_PLACEHOLDER = "<p><em>Output will appear below:</em></p>"
-AGGREGATE_MODEL = "MBZUAI/LaMini-Flan-T5-783M"  # model to use for aggregation
 # if duplicating space: uncomment this line to adjust the max words
 # os.environ["APP_MAX_WORDS"] = str(2048)  # set the max words to 2048
@@ -488,7 +489,7 @@ if __name__ == "__main__":
     with demo:
         gr.Markdown(
             """# Document Summarization with Long-Document Transformers
             An example use case for fine-tuned long document transformers. Model(s) are trained on [book summaries](https://hf.co/datasets/kmfoda/booksum). Architectures [in this demo](https://hf.co/spaces/pszemraj/document-summarization) are [LongT5-base](https://hf.co/pszemraj/long-t5-tglobal-base-16384-book-summary) and [Pegasus-X-Large](https://hf.co/pszemraj/pegasus-x-large-book-summary).
             **Want more performance?** Run this demo from a free [Google Colab GPU](https://colab.research.google.com/gist/pszemraj/52f67cf7326e780155812a6a1f9bb724/document-summarization-on-gpu.ipynb)
@@ -497,7 +498,7 @@ if __name__ == "__main__":
         with gr.Column():
             gr.Markdown(
                 """## Load Inputs & Select Parameters
                 Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
                 See [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for details.
@@ -596,8 +597,9 @@ if __name__ == "__main__":
                     )
         with gr.Column():
-            gr.Markdown("""### Advanced Settings
             Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_.
             """
             )

     APP_MAX_WORDS (int): the maximum number of words to use for summarization
     APP_OCR_MAX_PAGES (int): the maximum number of pages to use for OCR
 """
 import argparse
 import contextlib
 import gc
 ]  # token batch sizes users can choose from
 SUMMARY_PLACEHOLDER = "<p><em>Output will appear below:</em></p>"
+AGGREGATE_MODEL = "pszemraj/bart-large-summary-map-reduce"  # map-reduce model
 # if duplicating space: uncomment this line to adjust the max words
 # os.environ["APP_MAX_WORDS"] = str(2048)  # set the max words to 2048
     with demo:
         gr.Markdown(
             """# Document Summarization with Long-Document Transformers
             An example use case for fine-tuned long document transformers. Model(s) are trained on [book summaries](https://hf.co/datasets/kmfoda/booksum). Architectures [in this demo](https://hf.co/spaces/pszemraj/document-summarization) are [LongT5-base](https://hf.co/pszemraj/long-t5-tglobal-base-16384-book-summary) and [Pegasus-X-Large](https://hf.co/pszemraj/pegasus-x-large-book-summary).
             **Want more performance?** Run this demo from a free [Google Colab GPU](https://colab.research.google.com/gist/pszemraj/52f67cf7326e780155812a6a1f9bb724/document-summarization-on-gpu.ipynb)
         with gr.Column():
             gr.Markdown(
                 """## Load Inputs & Select Parameters
                 Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
                 See [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for details.
                     )
         with gr.Column():
+            gr.Markdown(
+                """### Advanced Settings
             Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_.
             """
             )