Spaces:

seanpedrickcase
/

topic_modelling

Running

App Files Files Community

seanpedrickcase commited on 7 days ago

Commit

db3eaec

1 Parent(s): 39f4270

Fixes on representation model, visualisations, and embeddings in CPU mode. Package updates and optimisation for compatibility

Browse files

Files changed (9) hide show

Dockerfile +2 -5
app.py +6 -6
funcs/bertopic_vis_documents.py +81 -37
funcs/embeddings.py +4 -0
funcs/prompts.py +5 -5
funcs/representation_model.py +236 -2
requirements.txt +2 -2
requirements_aws.txt +2 -2
requirements_gpu.txt +1 -1

Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # Stage 1: Build dependencies and download models
-FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm AS builder
 # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL (not essential if not using Lambda)
 #COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.4 /lambda-adapter /opt/extensions/lambda-adapter
@@ -31,7 +31,7 @@ RUN python /src/download_model.py
 RUN rm requirements_aws.txt download_model.py
 # Stage 2: Final runtime image
-FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
 # Create a non-root user
 RUN useradd -m -u 1000 user
@@ -43,9 +43,6 @@ COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
 RUN mkdir -p /home/user/app/output /home/user/.cache/huggingface/hub /home/user/.cache/matplotlib /home/user/app/cache \
     && chown -R user:user /home/user
-# Download the quantised phi model directly with curl. Changed at it is so big - not loaded
-#RUN curl -L -o /home/user/app/model/rep/Llama-3.2-3B-Instruct-Q5_K_M.gguf https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/tree/main/Llama-3.2-3B-Instruct-Q5_K_M.gguf
 # Copy models from the builder stage
 COPY --from=builder /model/rep /home/user/app/model/rep
 COPY --from=builder /model/embed /home/user/app/model/embed

 # Stage 1: Build dependencies and download models
+FROM public.ecr.aws/docker/library/python:3.12.11-slim-trixie AS builder
 # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL (not essential if not using Lambda)
 #COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.4 /lambda-adapter /opt/extensions/lambda-adapter
 RUN rm requirements_aws.txt download_model.py
 # Stage 2: Final runtime image
+FROM public.ecr.aws/docker/library/python:3.12.1-slim-trixie
 # Create a non-root user
 RUN useradd -m -u 1000 user
 RUN mkdir -p /home/user/app/output /home/user/.cache/huggingface/hub /home/user/.cache/matplotlib /home/user/app/cache \
     && chown -R user:user /home/user
 # Copy models from the builder stage
 COPY --from=builder /model/rep /home/user/app/model/rep
 COPY --from=builder /model/embed /home/user/app/model/embed

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ usage_logs_folder = 'usage/' + today_rev + '/' + host_name + '/'
 # Gradio app
-app = gr.Blocks(theme = gr.themes.Base())
 with app:
@@ -77,14 +77,14 @@ with app:
                 in_colnames = gr.Dropdown(choices=["Choose a column"], multiselect = True, label="Select column to find topics (first will be chosen if multiple selected).")
         with gr.Accordion("Clean data", open = False):
-            with gr.Row():
                 clean_text = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Remove html, URLs, non-ASCII, large numbers, emails, postcodes (UK).")
                 drop_duplicate_text = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Remove duplicate text, drop < 50 character strings.")
                 anonymise_drop = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Redact personal information - not 100% effective and slow!")
                 #with gr.Row():
                 split_sentence_drop = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Split text into sentences. Useful for small datasets.")
                 #additional_custom_delimiters_drop = gr.Dropdown(choices=["and", ",", "as well as", "also"], multiselect=True, label="Additional custom delimiters to split sentences.")
-                min_sentence_length_num = gr.Number(value=5, label="Min char length of split sentences")
             with gr.Row():
                 custom_regex = gr.UploadButton(label="Import custom regex removal file", file_count="multiple")
@@ -115,11 +115,11 @@ with app:
             topics_btn = gr.Button("Extract topics", variant="primary")
         with gr.Row():
-            output_single_text = gr.Textbox(label="Output topics")
             output_file = gr.File(label="Output file")
         with gr.Accordion("Post processing options.", open = True):
-            with gr.Row():
                 representation_type =  gr.Dropdown(label = "Method for generating new topic labels", value="Default", choices=["Default", "MMR", "KeyBERT", "LLM"])
                 represent_llm_btn = gr.Button("Change topic labels")
             with gr.Row():
@@ -135,7 +135,7 @@ with app:
         plot_btn = gr.Button("Visualise topic model")
         with gr.Row():
-            vis_output_single_text = gr.Textbox(label="Visualisation output text")
             out_plot_file = gr.File(label="Output plots to file", file_count="multiple")
         plot = gr.Plot(label="Visualise your topics here.")
         plot_2 = gr.Plot(label="Visualise your topics here.")

 # Gradio app
+app = gr.Blocks(theme=gr.themes.Default(primary_hue="blue"), fill_width = True)
 with app:
                 in_colnames = gr.Dropdown(choices=["Choose a column"], multiselect = True, label="Select column to find topics (first will be chosen if multiple selected).")
         with gr.Accordion("Clean data", open = False):
+            with gr.Row(equal_height = True):
                 clean_text = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Remove html, URLs, non-ASCII, large numbers, emails, postcodes (UK).")
                 drop_duplicate_text = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Remove duplicate text, drop < 50 character strings.")
                 anonymise_drop = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Redact personal information - not 100% effective and slow!")
                 #with gr.Row():
                 split_sentence_drop = gr.Dropdown(value = "No", choices=["Yes", "No"], multiselect=False, label="Split text into sentences. Useful for small datasets.")
                 #additional_custom_delimiters_drop = gr.Dropdown(choices=["and", ",", "as well as", "also"], multiselect=True, label="Additional custom delimiters to split sentences.")
+                min_sentence_length_num = gr.Number(value=5, label="Minimum character length of split sentences")
             with gr.Row():
                 custom_regex = gr.UploadButton(label="Import custom regex removal file", file_count="multiple")
             topics_btn = gr.Button("Extract topics", variant="primary")
         with gr.Row():
+            output_single_text = gr.Textbox(label="Output topics", lines = 5)
             output_file = gr.File(label="Output file")
         with gr.Accordion("Post processing options.", open = True):
+            with gr.Row(equal_height = True):
                 representation_type =  gr.Dropdown(label = "Method for generating new topic labels", value="Default", choices=["Default", "MMR", "KeyBERT", "LLM"])
                 represent_llm_btn = gr.Button("Change topic labels")
             with gr.Row():
         plot_btn = gr.Button("Visualise topic model")
         with gr.Row():
+            vis_output_single_text = gr.Textbox(label="Visualisation output text (if data points don't appear below, download the html output to see them)")
             out_plot_file = gr.File(label="Output plots to file", file_count="multiple")
         plot = gr.Plot(label="Visualise your topics here.")
         plot_2 = gr.Plot(label="Visualise your topics here.")

funcs/bertopic_vis_documents.py CHANGED Viewed

@@ -197,50 +197,94 @@ def visualize_documents_custom(topic_model,
     if len(non_selected_topics) == 0:
         non_selected_topics = [-1]
-    selection = df.loc[df.topic.isin(non_selected_topics), :]
-    selection["text"] = ""
-    selection.loc[len(selection), :] = [None, None, None, selection.x.mean(), selection.y.mean(), "Other documents"]
-    fig.add_trace(
-        go.Scattergl(
-            x=selection.x,
-            y=selection.y,
-            hovertext=selection.hover_labels if not hide_document_hover else None,
-            hoverinfo="text",
-            mode='markers+text',
-            name="other",
-            showlegend=False,
-            marker=dict(color='#CFD8DC', size=5, opacity=0.5),
-            hoverlabel=dict(align='left')
-        )
-    )
     # Selected topics
     for name, topic in zip(names, unique_topics):
         #print(name)
         #print(topic)
         if topic in topics and topic != -1:
-            selection = df.loc[df.topic == topic, :]
-            selection["text"] = ""
-            if not hide_annotations:
-                selection.loc[len(selection), :] = [None, None, selection.x.mean(), selection.y.mean(), name]
-            fig.add_trace(
-                go.Scattergl(
-                    x=selection.x,
-                    y=selection.y,
-                    hovertext=selection.hover_labels if not hide_document_hover else None,
-                    hoverinfo="text",
-                    text=selection.text,
-                    mode='markers+text',
-                    name=name,
-                    textfont=dict(
-                        size=12,
-                    ),
-                    marker=dict(size=5, opacity=0.5),
-                    hoverlabel=dict(align='left')
-            ))
     # Add grid in a 'plus' shape
     x_range = (df.x.min() - abs((df.x.min()) * .15), df.x.max() + abs((df.x.max()) * .15))

     if len(non_selected_topics) == 0:
         non_selected_topics = [-1]
+    selection = df.loc[df.topic.isin(non_selected_topics), :].copy()
+    if len(selection) > 0:
+        selection["text"] = ""
+        # Only add annotation row if selection is not empty
+        if not hide_annotations:
+            annotation_row = pd.DataFrame({
+                "topic": [None],
+                "doc": [None],
+                "hover_labels": [None],
+                "x": [selection.x.mean()],
+                "y": [selection.y.mean()],
+                "text": ["Other documents"]
+            })
+            selection = pd.concat([selection, annotation_row], ignore_index=True)
+        # Filter out rows where x or y is NaN to keep arrays aligned
+        valid_mask = selection.x.notna() & selection.y.notna()
+        selection_valid = selection[valid_mask].copy()
+        # Convert to lists to avoid Series issues
+        x_vals = selection_valid.x.tolist()
+        y_vals = selection_valid.y.tolist()
+        hover_vals = selection_valid.hover_labels.tolist() if not hide_document_hover and len(selection_valid) > 0 else None
+        text_vals = selection_valid.text.tolist() if len(selection_valid) > 0 else []
+        if len(x_vals) > 0:  # Only add trace if there are valid data points
+            fig.add_trace(
+                go.Scattergl(
+                    x=x_vals,
+                    y=y_vals,
+                    hovertext=hover_vals,
+                    hoverinfo="text",
+                    mode='markers+text',
+                    name="other",
+                    showlegend=False,
+                    marker=dict(color='#CFD8DC', size=5, opacity=0.5),
+                    hoverlabel=dict(align='left'),
+                    text=text_vals if len(text_vals) > 0 and any(t for t in text_vals if t) else None
+                )
+            )
     # Selected topics
     for name, topic in zip(names, unique_topics):
         #print(name)
         #print(topic)
         if topic in topics and topic != -1:
+            selection = df.loc[df.topic == topic, :].copy()
+            if len(selection) > 0:
+                selection["text"] = ""
+                if not hide_annotations:
+                    # Add annotation row properly using DataFrame concat
+                    annotation_row = pd.DataFrame({
+                        "topic": [None],
+                        "doc": [None],
+                        "hover_labels": [None],
+                        "x": [selection.x.mean()],
+                        "y": [selection.y.mean()],
+                        "text": [name]
+                    })
+                    selection = pd.concat([selection, annotation_row], ignore_index=True)
+                # Filter out rows where x or y is NaN to keep arrays aligned
+                valid_mask = selection.x.notna() & selection.y.notna()
+                selection_valid = selection[valid_mask].copy()
+                # Convert to lists to avoid Series issues
+                x_vals = selection_valid.x.tolist()
+                y_vals = selection_valid.y.tolist()
+                hover_vals = selection_valid.hover_labels.tolist() if not hide_document_hover else None
+                text_vals = selection_valid.text.tolist()
+                if len(x_vals) > 0:  # Only add trace if there are valid data points
+                    fig.add_trace(
+                        go.Scattergl(
+                            x=x_vals,
+                            y=y_vals,
+                            hovertext=hover_vals,
+                            hoverinfo="text",
+                            text=text_vals if len(text_vals) > 0 and any(t for t in text_vals if t) else None,
+                            mode='markers+text',
+                            name=name,
+                            textfont=dict(
+                                size=12,
+                            ),
+                            marker=dict(size=5, opacity=0.5),
+                            hoverlabel=dict(align='left')
+                    ))
     # Add grid in a 'plus' shape
     x_range = (df.x.min() - abs((df.x.min()) * .15), df.x.max() + abs((df.x.max()) * .15))

funcs/embeddings.py CHANGED Viewed

@@ -71,6 +71,10 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
                 TruncatedSVD(100, random_state=random_seed)
                 )
     # If no embeddings found, make or load in
     if embeddings_out.size == 0:
         print("Embeddings not found. Loading or generating new ones.")

                 TruncatedSVD(100, random_state=random_seed)
                 )
+    # Ensure embeddings_out is a numpy array (handle case where it might be a string from Gradio state)
+    if not isinstance(embeddings_out, np.ndarray):
+        embeddings_out = np.array([])
     # If no embeddings found, make or load in
     if embeddings_out.size == 0:
         print("Embeddings not found. Loading or generating new ones.")

funcs/prompts.py CHANGED Viewed

@@ -16,7 +16,7 @@ capybara_example_prompt = """USER:I have a topic that contains the following doc
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
-Based on the information about the topic above, please create a short label of this topic. Make sure you to only return the label and nothing more.
 Topic label: Environmental impacts of eating meat
 """
@@ -54,7 +54,7 @@ I have a topic that contains the following documents:
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
-Based on the information about the topic above, please create a short label of this topic. Make sure you to only return the label and nothing more.
 Topic label: Environmental impacts of eating meat
 """
@@ -83,7 +83,7 @@ I have a topic that contains the following documents:
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
-Based on the information about the topic above, please create a short label of this topic. Make sure you to only return the label and nothing more.
 Topic label: Environmental impacts of eating meat
 """
@@ -115,7 +115,7 @@ I have a topic that contains the following documents:
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
-Based on the information about the topic above, please create a short label of this topic. Make sure you to only return the label and nothing more.
 Topic label: Environmental impacts of eating meat
 """
@@ -129,7 +129,7 @@ I have a topic that contains the following documents:
 The topic is described by the following keywords: '[KEYWORDS]'.
-Based on the information about the topic above, please create a short label of this topic. Make sure you to only return the label and nothing more.<|end|>
 <|assistant|>
 Topic label:"""

 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
+Based on the information about the topic above, please create a short label of this topic. Return only the label and no other text or explanation.
 Topic label: Environmental impacts of eating meat
 """
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
+Based on the information about the topic above, please create a short label of this topic. Return only the label and no other text or explanation.
 Topic label: Environmental impacts of eating meat
 """
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
+Based on the information about the topic above, please create a short label of this topic. Return only the label and no other text or explanation.
 Topic label: Environmental impacts of eating meat
 """
 The topic is described by the following keywords: 'meat, beef, eat, eating, emissions, steak, food, health, processed, chicken'.
+Based on the information about the topic above, please create a short label of this topic. Return only the label and no other text or explanation.
 Topic label: Environmental impacts of eating meat
 """
 The topic is described by the following keywords: '[KEYWORDS]'.
+Based on the information about the topic above, please create a short label of this topic. Return only the label and no other text or explanation.<|end|>
 <|assistant|>
 Topic label:"""

funcs/representation_model.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import spaces
 from bertopic.representation import LlamaCPP
@@ -8,10 +9,227 @@ from huggingface_hub import hf_hub_download
 from gradio import Warning
 from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
-from funcs.embeddings import torch_device
 from funcs.prompts import phi3_prompt, phi3_start
 from funcs.helper_functions import get_or_create_env_var, GPU_SPACE_DURATION
 chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
 chosen_start_tag =  phi3_start #open_hermes_start # stablelm_start
@@ -222,7 +440,23 @@ def create_representation_model(representation_type: str, llm_config: dict, hf_m
         print("Loading representation model with", llm_config.n_gpu_layers, "layers allocated to GPU.")
         #llm_config.n_gpu_layers
-        llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx,seed=seed) #**llm_config.model_dump())#  rope_freq_scale=0.5,
         #print(llm.n_gpu_layers)
         #print("Chosen prompt:", chosen_prompt)
         llm_model = LlamaCPP(llm, prompt=chosen_prompt)#, **gen_config.model_dump())

 import os
+import re
 import spaces
 from bertopic.representation import LlamaCPP
 from gradio import Warning
 from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
 from funcs.prompts import phi3_prompt, phi3_start
 from funcs.helper_functions import get_or_create_env_var, GPU_SPACE_DURATION
+def clean_llm_output_text(text: str) -> str:
+    """
+    Clean LLM output text by removing special characters.
+    Keeps only: letters, numbers, spaces, dashes, and apostrophes (for contractions).
+    Args:
+        text: The text to clean
+    Returns:
+        Cleaned text with special characters removed
+    """
+    if not text:
+        return ""
+    # Keep only alphanumeric characters, spaces, dashes, and apostrophes
+    # This regex keeps: a-z, A-Z, 0-9, spaces, hyphens/dashes, and apostrophes
+    cleaned = re.sub(r'[^a-zA-Z0-9\s\-\']', '', text)
+    # Clean up multiple spaces and strip
+    cleaned = re.sub(r'\s+', ' ', cleaned)
+    cleaned = cleaned.strip()
+    return cleaned
+def patch_llama_create_chat_completion(llama_model):
+    """
+    Monkey-patch the create_chat_completion method on a Llama model instance
+    to use raw completion instead of chat format handler.
+    This avoids the "System role not supported" error for models like phi3.
+    Args:
+        llama_model: The Llama model instance to patch
+    Returns:
+        The same llama_model instance with patched create_chat_completion method
+    """
+    def patched_create_chat_completion(messages, **kwargs):
+        """
+        Override create_chat_completion to use raw completion.
+        This avoids the chat format handler that requires system roles (not supported by phi3).
+        BERTopic's LlamaCPP formats messages and uses the prompt template, so we reconstruct
+        the full prompt from the messages.
+        """
+        # Reconstruct the prompt from messages
+        # BERTopic's LlamaCPP passes messages in OpenAI format: [{"role": "user", "content": "..."}]
+        prompt_parts = []
+        for msg in messages:
+            if isinstance(msg, dict):
+                role = msg.get('role', 'user')
+                content = msg.get('content', '')
+                # Skip system messages as phi3 doesn't support them
+                if role != 'system' and content:
+                    prompt_parts.append(content)
+            else:
+                prompt_parts.append(str(msg))
+        # Join all message contents into a single prompt
+        prompt = '\n'.join(prompt_parts) if prompt_parts else ''
+        # Use raw completion instead of chat completion
+        # This avoids the chat format handler that requires system roles
+        # Remove chat-specific kwargs that might cause issues, but enable streaming
+        completion_kwargs = {k: v for k, v in kwargs.items()
+                           if k not in ['messages', 'chat_format', 'chat_handler']}
+        # Enable streaming to show output in real-time
+        completion_kwargs['stream'] = True
+        # Use create_completion for raw text completion (not chat completion)
+        # With stream=True, this returns a generator of CompletionChunk objects
+        text_parts = []
+        try:
+            # Create completion with streaming enabled
+            completion_stream = llama_model.create_completion(prompt, **completion_kwargs)
+            # Iterate through the stream and collect text
+            print("\nLLM Output: ", end="", flush=True)  # Print prefix without newline
+            for chunk in completion_stream:
+                # Extract text from each chunk
+                chunk_text = ""
+                # Handle dictionary chunks (the format returned by llama_cpp)
+                if isinstance(chunk, dict):
+                    # Extract from chunk['choices'][0]['text'] - this is the standard format
+                    if 'choices' in chunk and len(chunk['choices']) > 0:
+                        choice = chunk['choices'][0]
+                        if isinstance(choice, dict):
+                            chunk_text = choice.get('text', '') or choice.get('content', '')
+                        elif hasattr(choice, 'text'):
+                            chunk_text = choice.text
+                        elif hasattr(choice, 'content'):
+                            chunk_text = choice.content
+                    elif 'text' in chunk:
+                        chunk_text = chunk['text']
+                    elif 'content' in chunk:
+                        chunk_text = chunk['content']
+                # Try different ways to extract text from the chunk (object format)
+                elif hasattr(chunk, 'choices') and len(chunk.choices) > 0:
+                    choice = chunk.choices[0]
+                    if hasattr(choice, 'text'):
+                        chunk_text = choice.text
+                    elif hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
+                        # Some formats use delta.content
+                        chunk_text = choice.delta.content or ""
+                    elif hasattr(choice, 'content'):
+                        chunk_text = choice.content
+                    elif isinstance(choice, dict):
+                        chunk_text = choice.get('text', '') or choice.get('delta', {}).get('content', '')
+                elif hasattr(chunk, 'text'):
+                    chunk_text = chunk.text
+                elif isinstance(chunk, str):
+                    chunk_text = chunk
+                elif hasattr(chunk, '__dict__'):
+                    # Check various possible attributes
+                    chunk_dict = chunk.__dict__
+                    if 'text' in chunk_dict:
+                        chunk_text = chunk_dict['text']
+                    elif 'choices' in chunk_dict:
+                        choices = chunk_dict['choices']
+                        if choices and len(choices) > 0:
+                            if isinstance(choices[0], dict):
+                                chunk_text = choices[0].get('text', '') or choices[0].get('delta', {}).get('content', '')
+                            elif hasattr(choices[0], 'text'):
+                                chunk_text = choices[0].text
+                            elif hasattr(choices[0], 'delta'):
+                                delta = choices[0].delta
+                                if hasattr(delta, 'content'):
+                                    chunk_text = delta.content or ""
+                # Only add non-empty text and filter out debug messages
+                if chunk_text and chunk_text.strip():
+                    # Filter out llama.cpp debug messages
+                    if not any(debug_keyword in chunk_text for debug_keyword in [
+                        'llama_perf_context_print', 'Llama.generate', 'load time',
+                        'prompt eval time', 'eval time', 'total time', 'prefix-match hit'
+                    ]):
+                        text_parts.append(chunk_text)
+                        print(chunk_text, end="", flush=True)  # Print without newline, flush immediately
+            print()  # Newline after streaming is complete
+            text = ''.join(text_parts)
+            # Clean the text to remove special characters
+            text = clean_llm_output_text(text)
+            # If no text was collected, there might be an issue with chunk extraction
+            if not text:
+                print("Warning: No text extracted from streaming chunks. Chunk structure may be different.")
+                print("Falling back to non-streaming mode.")
+                raise Exception("No text in stream")
+        except (AttributeError, TypeError, Exception) as e:
+            # Fallback to non-streaming if create_completion doesn't exist or streaming fails
+            print(f"\nStreaming failed, falling back to non-streaming mode: {e}")
+            completion_kwargs.pop('stream', None)  # Remove stream parameter
+            try:
+                completion = llama_model.create_completion(prompt, **completion_kwargs)
+            except AttributeError:
+                completion = llama_model(prompt, **completion_kwargs)
+            # Extract text from the completion object
+            text = ""
+            if hasattr(completion, 'choices') and len(completion.choices) > 0:
+                # Standard Completion object format
+                if hasattr(completion.choices[0], 'text'):
+                    text = completion.choices[0].text
+                elif hasattr(completion.choices[0], 'content'):
+                    text = completion.choices[0].content
+            elif hasattr(completion, 'text'):
+                # Direct text attribute
+                text = completion.text
+            elif isinstance(completion, str):
+                # Already a string
+                text = completion
+            elif hasattr(completion, '__dict__'):
+                # Try to get text from object attributes
+                if 'text' in completion.__dict__:
+                    text = completion.__dict__['text']
+                elif 'choices' in completion.__dict__:
+                    choices = completion.__dict__['choices']
+                    if choices and len(choices) > 0:
+                        if isinstance(choices[0], dict):
+                            text = choices[0].get('text', '')
+                        elif hasattr(choices[0], 'text'):
+                            text = choices[0].text
+            else:
+                # Last resort: convert to string (but this might not work well)
+                text = str(completion)
+        # Clean up the text - remove special characters and whitespace
+        text = clean_llm_output_text(text) if text else ""
+        # Create a chat completion response as a dictionary
+        # BERTopic accesses it as: response["choices"][0]["message"]["content"]
+        # Always return a dictionary to ensure it's subscriptable
+        return {
+            "choices": [{
+                "message": {
+                    "content": text,
+                    "role": "assistant"
+                },
+                "finish_reason": "stop",
+                "index": 0
+            }],
+            "id": "custom",
+            "created": 0,
+            "model": "",
+            "object": "chat.completion"
+        }
+    # Replace the method on the instance
+    llama_model.create_chat_completion = patched_create_chat_completion
+    return llama_model
 chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
 chosen_start_tag =  phi3_start #open_hermes_start # stablelm_start
         print("Loading representation model with", llm_config.n_gpu_layers, "layers allocated to GPU.")
         #llm_config.n_gpu_layers
+        # Initialize Llama model - try to disable chat format handler if supported
+        # This helps avoid "System role not supported" error for models like phi3
+        try:
+            llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx, seed=seed, chat_format=None)
+        except TypeError:
+            # If chat_format parameter doesn't exist, try without it or with chat_handler
+            try:
+                llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx, seed=seed, chat_handler=None)
+            except TypeError:
+                # Fall back to basic initialization if chat format parameters don't exist
+                llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx, seed=seed)
+        # Monkey-patch the create_chat_completion method to use raw completion
+        # This avoids the chat format handler that requires system roles (not supported by phi3)
+        # We patch the instance directly so it still passes isinstance checks in BERTopic
+        llm = patch_llama_create_chat_completion(llm)
         #print(llm.n_gpu_layers)
         #print("Chosen prompt:", chosen_prompt)
         llm_model = LlamaCPP(llm, prompt=chosen_prompt)#, **gen_config.model_dump())

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 pandas==2.3.3
-plotly==6.3.1
 scikit-learn==1.7.2
 umap-learn==0.5.9.post2
 gradio==5.49.1
@@ -23,4 +22,5 @@ llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-py
 # Specify exact llama_cpp wheel for huggingface compatibility
 # https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu121/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
 spaces==0.42.1
-numpy==2.2.6

 pandas==2.3.3
 scikit-learn==1.7.2
 umap-learn==0.5.9.post2
 gradio==5.49.1
 # Specify exact llama_cpp wheel for huggingface compatibility
 # https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu121/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
 spaces==0.42.1
+numpy==2.2.6
+plotly<=5.24.1 # Downgrade needed to enable correct topic document output display

requirements_aws.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 pandas==2.3.3
-plotly==6.3.1
 scikit-learn==1.7.2
 umap-learn==0.5.9.post2
 boto3==1.40.72
@@ -18,4 +17,5 @@ accelerate==1.11.0
 bertopic==0.17.3
 sentence-transformers==5.1.2
 spaces==0.42.1
-numpy==2.2.6

 pandas==2.3.3
 scikit-learn==1.7.2
 umap-learn==0.5.9.post2
 boto3==1.40.72
 bertopic==0.17.3
 sentence-transformers==5.1.2
 spaces==0.42.1
+numpy==2.2.6
+plotly<=5.24.1 # Downgrade needed to enable correct topic document output display

requirements_gpu.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 pandas==2.3.3
-plotly==6.3.1
 scikit-learn==1.7.2
 umap-learn==0.5.9.post2
 gradio==5.49.1
@@ -21,4 +20,5 @@ llama-cpp-python==0.3.4 --extra-index-url https://abetlen.github.io/llama-cpp-py
 sentence-transformers==5.1.2
 spaces==0.42.1
 numpy==2.2.6

 pandas==2.3.3
 scikit-learn==1.7.2
 umap-learn==0.5.9.post2
 gradio==5.49.1
 sentence-transformers==5.1.2
 spaces==0.42.1
 numpy==2.2.6
+plotly<=5.24.1 # Downgrade needed to enable correct topic document output display