Spaces:

awacke1
/

AzureCosmosDBUI

Sleeping

App Files Files Community

awacke1 commited on Feb 22

Commit

e8f830c

verified ·

1 Parent(s): faff1a8

Update app.py

Browse files

Files changed (1) hide show

app.py +358 -232

app.py CHANGED Viewed

@@ -1,40 +1,36 @@
 # app.py
 # =============================================================================
-# 🚀 IMPORTS
 # =============================================================================
-import base64  # 🔥 For encoding/decoding files
-import glob  # 🔍 For file searching
-import hashlib  # 🔒 For hashing
-import json  # 🧮 For JSON handling
-import os  # 📁 For OS interactions
-import pandas as pd  # 🐼 For data frame support
-import pytz  # ⏰ For timezone management
-import random  # 🎲 For randomness
-import re  # 🔍 For regex operations
-import shutil  # 🗑️ For file copying/removal
-import streamlit as st  # 💻 For the Streamlit UI
-import time  # ⏳ For timing
-import traceback  # 🚨 For error traces
-import uuid  # 🆔 For unique ID generation
-import zipfile  # 📦 For archiving files
-from PIL import Image  # 🖼️ For image processing
-from azure.cosmos import CosmosClient, PartitionKey, exceptions  # ☁️ For Cosmos DB operations
-from datetime import datetime  # ⏰ For timestamps
-from git import Repo  # 🐙 For Git operations
-from github import Github  # 🔗 For GitHub API interactions
-from gradio_client import Client, handle_file  # 🤖 For Gradio video generation
-import tempfile  # 📝 For temporary file handling
-import io  # 📡 For in-memory streams
-import requests  # 🌐 For HTTP requests
-import numpy as np  # 🔢 For numerical operations
-from urllib.parse import quote  # 🔗 For URL encoding
-# Allow nested asyncio.run calls (needed for our async TTS and Arxiv search)
-import nest_asyncio
-nest_asyncio.apply()
 # =============================================================================
-# 😎 EXTERNAL HELP LINKS (Always visible in sidebar)
 # =============================================================================
 external_links = [
     {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "💻"},
@@ -50,7 +46,7 @@ external_links = [
 ]
 # =============================================================================
-# 🎨 APP CONFIGURATION
 # =============================================================================
 Site_Name = '🐙 GitCosmos'
 title = "🐙 GitCosmos"
@@ -78,9 +74,8 @@ LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI"
 CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
 # =============================================================================
-# 💾 HELPER FUNCTIONS
 # =============================================================================
-# 🔗 Get a download link for a file
 def get_download_link(file_path):
     with open(file_path, "rb") as file:
         contents = file.read()
@@ -88,7 +83,6 @@ def get_download_link(file_path):
         file_name = os.path.basename(file_path)
         return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} 📂</a>'
-# 🆔 Generate a unique ID
 def generate_unique_id():
     timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
     unique_uuid = str(uuid.uuid4())
@@ -96,27 +90,23 @@ def generate_unique_id():
     st.write('New ID: ' + return_value)
     return return_value
-# 📝 Generate a safe filename based on a prompt
 def generate_filename(prompt, file_type):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
     safe_prompt = re.sub(r'\W+', '', prompt)[:90]
     return f"{safe_date_time}{safe_prompt}.{file_type}"
-# 📄 Create a file with given content
 def create_file(filename, prompt, response, should_save=True):
     if not should_save:
         return
     with open(filename, 'w', encoding='utf-8') as file:
         file.write(prompt + "\n\n" + response)
-# 📂 Load file contents
 def load_file(file_name):
     with open(file_name, "r", encoding='utf-8') as file:
         content = file.read()
     return content
-# 🔗 Display a glossary entity with quick search links
 def display_glossary_entity(k):
     search_urls = {
         "🚀": lambda k: f"/?q={k}",
@@ -127,7 +117,6 @@ def display_glossary_entity(k):
     links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
     st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
-# 📦 Create a ZIP archive of given files
 def create_zip_of_files(files):
     zip_name = "all_files.zip"
     with zipfile.ZipFile(zip_name, 'w') as zipf:
@@ -135,7 +124,6 @@ def create_zip_of_files(files):
             zipf.write(file)
     return zip_name
-# 🎥 Get HTML to embed a video
 def get_video_html(video_path, width="100%"):
     video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
     return f'''
@@ -145,7 +133,6 @@ def get_video_html(video_path, width="100%"):
     </video>
     '''
-# 🎵 Get HTML to embed audio
 def get_audio_html(audio_path, width="100%"):
     audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
     return f'''
@@ -155,7 +142,6 @@ def get_audio_html(audio_path, width="100%"):
     </audio>
     '''
-# ✂️ Preprocess text (e.g., for JSON safety)
 def preprocess_text(text):
     text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
     text = text.replace('"', '\\"')
@@ -164,7 +150,7 @@ def preprocess_text(text):
     return text.strip()
 # =============================================================================
-# ☁️ COSMOS DB FUNCTIONS
 # =============================================================================
 def get_databases(client):
     return [db['id'] for db in client.list_databases()]
@@ -269,7 +255,7 @@ def archive_current_container(database_name, container_name, client):
         return f"Archive error: {str(e)} 😢"
 # =============================================================================
-# 🚀 ADVANCED COSMOS FUNCTIONS
 # =============================================================================
 def create_new_container(database, container_id, partition_key_path,
                          analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
@@ -338,7 +324,7 @@ def vector_search(container, query_vector, vector_field, top=10, exact_search=Fa
     return results
 # =============================================================================
-# 🐙 GITHUB FUNCTIONS
 # =============================================================================
 def download_github_repo(url, local_path):
     if os.path.exists(local_path):
@@ -371,7 +357,7 @@ def push_to_github(local_path, repo, github_token):
     origin.push(refspec=f'{current_branch}:{current_branch}')
 # =============================================================================
-# 📁 FILE & MEDIA MANAGEMENT FUNCTIONS
 # =============================================================================
 def display_saved_files_in_sidebar():
     all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
@@ -413,7 +399,11 @@ def display_file_editor(file_path):
             return
     st.markdown("### ✏️ Edit File")
     st.markdown(f"**Editing:** {file_path}")
-    new_content = st.text_area("Edit JSON", value=st.session_state.file_content[file_path], height=400, key="doc_editor", on_change=lambda: auto_save_edit())
     col1, col2 = st.columns([1, 5])
     with col1:
         if st.button("💾 Save"):
@@ -495,34 +485,35 @@ def update_file_management_section():
             display_file_editor(st.session_state.current_file)
 # =============================================================================
-# ✨ SIDEBAR DATA GRID: Show all container records with formatted timestamp
 # =============================================================================
-def show_sidebar_data_grid(container):
-    try:
-        records = get_documents(container)
-        # Build list of dicts with desired columns; sort descending by _ts or timestamp field
-        data = []
-        for rec in records:
-            ts = rec.get("timestamp", "")
-            try:
-                dt = datetime.fromisoformat(ts)
-                formatted = dt.strftime("%I:%M %p %m/%d/%Y")
-            except Exception:
-                formatted = ts
-            data.append({
-                "ID": rec.get("id", ""),
-                "Name": rec.get("name", ""),
-                "Timestamp": formatted
-            })
-        df = pd.DataFrame(data)
-        # Already sorted by _ts descending from the query; display in sidebar
-        st.sidebar.markdown("### 📊 Data Grid")
-        st.sidebar.dataframe(df)
-    except Exception as e:
-        st.sidebar.error(f"Data grid error: {str(e)}")
 # =============================================================================
-# 🎥 VIDEO & AUDIO UI FUNCTIONS
 # =============================================================================
 def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
     try:
@@ -638,7 +629,58 @@ def add_video_generation_ui(container):
             st.error(f"Upload error: {str(e)}")
 # =============================================================================
-# 🤖 NEW ITEM & FIELD FUNCTIONS
 # =============================================================================
 def new_item_default(container):
     new_id = generate_unique_id()
@@ -681,7 +723,7 @@ def add_field_to_doc():
          st.error(f"Error adding field: {str(e)}")
 # =============================================================================
-# 🔍 VECTOR SEARCH INTERFACE (Simple keyword search)
 # =============================================================================
 def vector_keyword_search(keyword, container):
     try:
@@ -693,7 +735,7 @@ def vector_keyword_search(keyword, container):
         return []
 # =============================================================================
-# 🤖 NEW AI MODALITY RECORD TEMPLATES
 # =============================================================================
 def new_ai_record(container):
     new_id = generate_unique_id()
@@ -737,7 +779,7 @@ def new_links_record(container):
          return None
 # =============================================================================
-# 🤖 LANGCHAIN FUNCTIONS (Witty emoji comments)
 # =============================================================================
 def display_langchain_functions():
     functions = [
@@ -750,37 +792,102 @@ def display_langchain_functions():
         st.sidebar.write(f"{func['name']}: {func['comment']}")
 # =============================================================================
-# ─────────────────────────────────────────────────────────
-# NEW: SIDEBAR DATA GRID FUNCTION
 # =============================================================================
-def show_sidebar_data_grid():
-    if st.session_state.get("current_container"):
-        show_sidebar_data_grid.container = st.session_state.current_container
         try:
-            records = get_documents(show_sidebar_data_grid.container)
-            data = []
-            for rec in records:
-                ts = rec.get("timestamp", "")
-                try:
-                    dt = datetime.fromisoformat(ts)
-                    formatted = dt.strftime("%I:%M %p %m/%d/%Y")
-                except Exception:
-                    formatted = ts
-                data.append({
-                    "ID": rec.get("id", ""),
-                    "Name": rec.get("name", ""),
-                    "Timestamp": formatted
-                })
-            df = pd.DataFrame(data)
-            st.sidebar.markdown("### 📊 Data Grid")
-            st.sidebar.dataframe(df)
         except Exception as e:
-            st.sidebar.error(f"Data grid error: {str(e)}")
-    else:
-        st.sidebar.info("No container selected for data grid.")
 # =============================================================================
-# 🤖 RESEARCH / ARXIV FUNCTIONS (Copied from second app code)
 # =============================================================================
 def parse_arxiv_refs(ref_text: str):
     if not ref_text:
@@ -848,7 +955,7 @@ def generate_5min_feature_markdown(paper: dict) -> str:
     pdf_link = generate_pdf_link(url)
     title_wc = len(title.split())
     summary_wc = len(summary.split())
-    high_info_terms = [term for term in summary.split()[:5]]  # simplified for demo
     terms_str = ", ".join(high_info_terms)
     rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
     mermaid_code = "```mermaid\nflowchart TD\n"
@@ -881,107 +988,80 @@ def create_detailed_paper_md(papers: list) -> str:
     return "\n".join(md_parts)
 # =============================================================================
-# 🤖 ASYNC TTS FUNCTIONS (from second app code)
 # =============================================================================
-import asyncio
-import edge_tts
-from streamlit_marquee import streamlit_marquee
-from collections import Counter
-class PerformanceTimer:
-    def __init__(self, operation_name: str):
-        self.operation_name = operation_name
-        self.start_time = None
-    def __enter__(self):
-        self.start_time = time.time()
-        return self
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        pass
-async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
-    with PerformanceTimer("tts_generation") as timer:
-        text = text.replace("\n", " ").strip()
-        if not text:
-            return None, 0
-        cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
-        if cache_key in st.session_state.get('audio_cache', {}):
-            return st.session_state['audio_cache'][cache_key], 0
-        try:
-            rate_str = f"{rate:+d}%"
-            pitch_str = f"{pitch:+d}Hz"
-            communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
-            await communicate.save(filename)
-            st.session_state.setdefault('audio_cache', {})[cache_key] = filename
-            return filename, time.time() - timer.start_time
-        except Exception as e:
-            st.error(f"Error generating audio: {str(e)}")
-            return None, 0
-def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
-    result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
-    if isinstance(result, tuple):
-        return result[0]
-    return result
-async def async_save_qa_with_audio(question: str, answer: str):
-    with PerformanceTimer("qa_save") as timer:
-        md_file = create_file(question, answer, "md")
-        audio_file = None
-        if st.session_state.get('enable_audio', True):
-            audio_text = f"{question}\n\nAnswer: {answer}"
-            audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
-        return md_file, audio_file, time.time() - timer.start_time, 0
-def save_qa_with_audio(question, answer, voice=None):
-    if not voice:
-        voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
-    md_file = create_file(question, answer, "md")
-    audio_text = f"{question}\n\nAnswer: {answer}"
-    audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
-    return md_file, audio_file
-def play_and_download_audio(file_path, file_type="mp3"):
-    if file_path and os.path.exists(file_path):
-        st.audio(file_path)
-        dl_link = get_download_link(file_path, file_type=file_type)
-        st.markdown(dl_link, unsafe_allow_html=True)
-def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
-    cache_key = f"dl_{file_path}"
-    if cache_key in st.session_state.get('download_link_cache', {}):
-        return st.session_state['download_link_cache'][cache_key]
-    try:
-        with open(file_path, "rb") as f:
-            b64 = base64.b64encode(f.read()).decode()
-        filename = os.path.basename(file_path)
-        if file_type == "mp3":
-            link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
-        elif file_type == "wav":
-            link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
-        elif file_type == "md":
-            link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
         else:
-            link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
-        st.session_state.setdefault('download_link_cache', {})[cache_key] = link
-        return link
-    except Exception as e:
-        st.error(f"Error creating download link: {str(e)}")
-        return ""
 # =============================================================================
-# ─────────────────────────────────────────────────────────
-# MAIN FUNCTION
 # =============================================================================
 def main():
-    # Friendly portal link
     st.markdown(f"[🔗 Portal]({CosmosDBUrl})")
-    # Initialize some session state keys if not already present
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
     st.session_state.setdefault("current_container", None)
-    # Sidebar: New Item, Add Field, New AI Record, New Links Record, and Vector Search
     st.sidebar.markdown("## 🛠️ Item Management")
     if st.sidebar.button("New Item"):
         if st.session_state.get("current_container"):
@@ -1017,17 +1097,15 @@ def main():
                 st.sidebar.code(json.dumps(res, indent=2), language="json")
         else:
             st.warning("No container selected for search!")
-    # Show the sidebar data grid with records
     show_sidebar_data_grid()
-    # Display Langchain functions in sidebar
     display_langchain_functions()
-    # Navigator: Container selection and data grid
     try:
         if st.session_state.get("client") is None:
             st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
         st.sidebar.title("🐙 Navigator")
         databases = get_databases(st.session_state.client)
         selected_db = st.sidebar.selectbox("🗃️ DB", databases)
         if selected_db != st.session_state.get("selected_database"):
             st.session_state.selected_database = selected_db
             st.session_state.selected_container = None
@@ -1048,12 +1126,7 @@ def main():
                     submitted = st.form_submit_button("Create Container")
                     if submitted:
                         analytical_ttl = -1 if new_analytical else None
-                        new_container = create_new_container(
-                            database,
-                            new_container_id,
-                            new_partition_key,
-                            analytical_storage_ttl=analytical_ttl
-                        )
                         if new_container:
                             st.success(f"Container '{new_container_id}' created.")
                             default_id = generate_unique_id()
@@ -1157,7 +1230,6 @@ def main():
                                 st.write(log_entry)
                 elif selected_view == 'Run AI':
                     st.markdown("#### 🤖 Run AI")
-                    # NEW: Use a text area and a Send button (message button UI)
                     ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
                     if st.button("Send"):
                         st.session_state.last_query = ai_query
@@ -1258,20 +1330,14 @@ def main():
         st.session_state.selected_document_id = None
         st.session_state.current_index = 0
         st.rerun()
-    # Also display the sidebar data grid (records overview)
     show_sidebar_data_grid()
 # =============================================================================
-# Additional Blank Lines for Spacing (~1500 lines total)
-# =============================================================================
-#
-#
-#
-#
-#
-#
-#
 #
 #
 #
@@ -1396,18 +1462,78 @@ def main():
 #
 #
 #
-#
-#
-#
-#
-#
-#
-#
-#
-#
-#
-#
-#
-#
-#
-# End of app.py

 # app.py
 # =============================================================================
+# ───────────── IMPORTS ─────────────
 # =============================================================================
+import base64
+import glob
+import hashlib
+import json
+import os
+import pandas as pd
+import pytz
+import random
+import re
+import shutil
+import streamlit as st
+import time
+import traceback
+import uuid
+import zipfile
+from PIL import Image
+from azure.cosmos import CosmosClient, PartitionKey, exceptions
+from datetime import datetime
+from git import Repo
+from github import Github
+from gradio_client import Client, handle_file
+import tempfile
+import io
+import requests
+import numpy as np
+from urllib.parse import quote
 # =============================================================================
+# ───────────── EXTERNAL HELP LINKS (Always visible in sidebar) ─────────────
 # =============================================================================
 external_links = [
     {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "💻"},
 ]
 # =============================================================================
+# ───────────── APP CONFIGURATION ─────────────
 # =============================================================================
 Site_Name = '🐙 GitCosmos'
 title = "🐙 GitCosmos"
 CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
 # =============================================================================
+# ───────────── HELPER FUNCTIONS ─────────────
 # =============================================================================
 def get_download_link(file_path):
     with open(file_path, "rb") as file:
         contents = file.read()
         file_name = os.path.basename(file_path)
         return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} 📂</a>'
 def generate_unique_id():
     timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
     unique_uuid = str(uuid.uuid4())
     st.write('New ID: ' + return_value)
     return return_value
 def generate_filename(prompt, file_type):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
     safe_prompt = re.sub(r'\W+', '', prompt)[:90]
     return f"{safe_date_time}{safe_prompt}.{file_type}"
 def create_file(filename, prompt, response, should_save=True):
     if not should_save:
         return
     with open(filename, 'w', encoding='utf-8') as file:
         file.write(prompt + "\n\n" + response)
 def load_file(file_name):
     with open(file_name, "r", encoding='utf-8') as file:
         content = file.read()
     return content
 def display_glossary_entity(k):
     search_urls = {
         "🚀": lambda k: f"/?q={k}",
     links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
     st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
 def create_zip_of_files(files):
     zip_name = "all_files.zip"
     with zipfile.ZipFile(zip_name, 'w') as zipf:
             zipf.write(file)
     return zip_name
 def get_video_html(video_path, width="100%"):
     video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
     return f'''
     </video>
     '''
 def get_audio_html(audio_path, width="100%"):
     audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
     return f'''
     </audio>
     '''
 def preprocess_text(text):
     text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
     text = text.replace('"', '\\"')
     return text.strip()
 # =============================================================================
+# ───────────── COSMOS DB FUNCTIONS ─────────────
 # =============================================================================
 def get_databases(client):
     return [db['id'] for db in client.list_databases()]
         return f"Archive error: {str(e)} 😢"
 # =============================================================================
+# ───────────── ADVANCED COSMOS FUNCTIONS ─────────────
 # =============================================================================
 def create_new_container(database, container_id, partition_key_path,
                          analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
     return results
 # =============================================================================
+# ───────────── GITHUB FUNCTIONS ─────────────
 # =============================================================================
 def download_github_repo(url, local_path):
     if os.path.exists(local_path):
     origin.push(refspec=f'{current_branch}:{current_branch}')
 # =============================================================================
+# ───────────── FILE & MEDIA MANAGEMENT FUNCTIONS ─────────────
 # =============================================================================
 def display_saved_files_in_sidebar():
     all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
             return
     st.markdown("### ✏️ Edit File")
     st.markdown(f"**Editing:** {file_path}")
+    md_tab, code_tab = st.tabs(["Markdown", "Code"])
+    with md_tab:
+        st.markdown(st.session_state.file_content[file_path])
+    with code_tab:
+        new_content = st.text_area("Edit:", value=st.session_state.file_content[file_path], height=400, key=f"editor_{hash(file_path)}", on_change=lambda: auto_save_edit())
     col1, col2 = st.columns([1, 5])
     with col1:
         if st.button("💾 Save"):
             display_file_editor(st.session_state.current_file)
 # =============================================================================
+# ───────────── SIDEBAR DATA GRID (Records with formatted timestamps) ─────────────
 # =============================================================================
+def show_sidebar_data_grid():
+    if st.session_state.get("current_container"):
+        try:
+            records = get_documents(st.session_state.current_container)
+            data = []
+            for rec in records:
+                ts = rec.get("timestamp", "")
+                try:
+                    dt = datetime.fromisoformat(ts)
+                    formatted = dt.strftime("%I:%M %p %m/%d/%Y")
+                except Exception:
+                    formatted = ts
+                data.append({
+                    "ID": rec.get("id", ""),
+                    "Name": rec.get("name", ""),
+                    "Timestamp": formatted
+                })
+            df = pd.DataFrame(data)
+            st.sidebar.markdown("### 📊 Data Grid")
+            st.sidebar.dataframe(df)
+        except Exception as e:
+            st.sidebar.error(f"Data grid error: {str(e)}")
+    else:
+        st.sidebar.info("No container selected for data grid.")
 # =============================================================================
+# ───────────── VIDEO & AUDIO UI FUNCTIONS ─────────────
 # =============================================================================
 def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
     try:
             st.error(f"Upload error: {str(e)}")
 # =============================================================================
+# ───────────── AI SAMPLES SIDEBAR (Processed as a Python List) ─────────────
+# =============================================================================
+def display_ai_samples():
+    ai_samples = [
+        {
+            "name": "FullTextContains",
+            "description": "Query using FullTextContains",
+            "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")'
+        },
+        {
+            "name": "FullTextContainsAll",
+            "description": "Query using FullTextContainsAll",
+            "query": 'SELECT TOP 10 * FROM c WHERE FullTextContainsAll(c.text, "red", "bicycle")'
+        },
+        {
+            "name": "FullTextContainsAny",
+            "description": "Query using FullTextContainsAny",
+            "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "red") AND FullTextContainsAny(c.text, "bicycle", "skateboard")'
+        },
+        {
+            "name": "FullTextScore",
+            "description": "Query using FullTextScore (order by relevance)",
+            "query": 'SELECT TOP 10 * FROM c ORDER BY RANK FullTextScore(c.text, ["bicycle", "mountain"])'
+        },
+        {
+            "name": "Vector Search with Score",
+            "description": "Example vector search snippet",
+            "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5)\nfor result, score in results:\n    print(result.json(), score)'
+        },
+        {
+            "name": "Vector Search with Filtering",
+            "description": "Example vector search with a filter",
+            "query": 'pre_filter = {"conditions": [{"property": "metadata.page", "operator": "$eq", "value": 0}]}\nresults = vector_search.similarity_search_with_score(query="Your query", k=5, pre_filter=pre_filter)'
+        },
+        {
+            "name": "Hybrid Search",
+            "description": "Example hybrid search snippet",
+            "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5, query_type=CosmosDBQueryType.HYBRID)'
+        }
+    ]
+    st.sidebar.markdown("### 🤖 AI Samples")
+    st.sidebar.info("🚀 Get started with our AI samples! Time free access to get started today.")
+    sample_names = [sample["name"] for sample in ai_samples]
+    selected_sample_name = st.sidebar.selectbox("Select an AI Sample", sample_names)
+    selected_sample = next((s for s in ai_samples if s["name"] == selected_sample_name), None)
+    if selected_sample:
+        st.sidebar.markdown(f"**{selected_sample['name']}**: {selected_sample['description']}")
+        lang = "sql" if "FullText" in selected_sample["name"] else "python"
+        st.sidebar.code(selected_sample["query"], language=lang)
+# =============================================================================
+# ───────────── NEW ITEM & FIELD FUNCTIONS
 # =============================================================================
 def new_item_default(container):
     new_id = generate_unique_id()
          st.error(f"Error adding field: {str(e)}")
 # =============================================================================
+# ───────────── VECTOR SEARCH INTERFACE (Simple keyword search)
 # =============================================================================
 def vector_keyword_search(keyword, container):
     try:
         return []
 # =============================================================================
+# ───────────── NEW AI MODALITY RECORD TEMPLATES
 # =============================================================================
 def new_ai_record(container):
     new_id = generate_unique_id()
          return None
 # =============================================================================
+# ───────────── LANGCHAIN FUNCTIONS (Witty emoji comments)
 # =============================================================================
 def display_langchain_functions():
     functions = [
         st.sidebar.write(f"{func['name']}: {func['comment']}")
 # =============================================================================
+# ───────────── OPTIONAL: SIDEBAR DATA GRID (Records with formatted timestamps)
 # =============================================================================
+# (This feature is now integrated above via show_sidebar_data_grid().)
+# =============================================================================
+# ───────────── ASYNC TTS & ARXIV FUNCTIONS (Optional Features)
+# =============================================================================
+import asyncio
+import edge_tts
+from streamlit_marquee import streamlit_marquee
+from collections import Counter
+class PerformanceTimer:
+    def __init__(self, operation_name: str):
+        self.operation_name = operation_name
+        self.start_time = None
+    def __enter__(self):
+        self.start_time = time.time()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
+    with PerformanceTimer("tts_generation") as timer:
+        text = text.replace("\n", " ").strip()
+        if not text:
+            return None, 0
+        cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
+        if cache_key in st.session_state.get('audio_cache', {}):
+            return st.session_state['audio_cache'][cache_key], 0
         try:
+            rate_str = f"{rate:+d}%"
+            pitch_str = f"{pitch:+d}Hz"
+            communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
+            await communicate.save(filename)
+            st.session_state.setdefault('audio_cache', {})[cache_key] = filename
+            return filename, time.time() - timer.start_time
         except Exception as e:
+            st.error(f"Error generating audio: {str(e)}")
+            return None, 0
+def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
+    result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
+    if isinstance(result, tuple):
+        return result[0]
+    return result
+async def async_save_qa_with_audio(question: str, answer: str):
+    with PerformanceTimer("qa_save") as timer:
+        md_file = create_file(question, answer, "md")
+        audio_file = None
+        if st.session_state.get('enable_audio', True):
+            audio_text = f"{question}\n\nAnswer: {answer}"
+            audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
+        return md_file, audio_file, time.time() - timer.start_time, 0
+def save_qa_with_audio(question, answer, voice=None):
+    if not voice:
+        voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
+    md_file = create_file(question, answer, "md")
+    audio_text = f"{question}\n\nAnswer: {answer}"
+    audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
+    return md_file, audio_file
+def play_and_download_audio(file_path, file_type="mp3"):
+    if file_path and os.path.exists(file_path):
+        st.audio(file_path)
+        dl_link = get_download_link(file_path, file_type=file_type)
+        st.markdown(dl_link, unsafe_allow_html=True)
+def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
+    cache_key = f"dl_{file_path}"
+    if cache_key in st.session_state.get('download_link_cache', {}):
+        return st.session_state['download_link_cache'][cache_key]
+    try:
+        with open(file_path, "rb") as f:
+            b64 = base64.b64encode(f.read()).decode()
+        filename = os.path.basename(file_path)
+        if file_type == "mp3":
+            link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
+        elif file_type == "wav":
+            link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
+        elif file_type == "md":
+            link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
+        else:
+            link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
+        st.session_state.setdefault('download_link_cache', {})[cache_key] = link
+        return link
+    except Exception as e:
+        st.error(f"Error creating download link: {str(e)}")
+        return ""
 # =============================================================================
+# ───────────── RESEARCH / ARXIV FUNCTIONS (Optional Features)
 # =============================================================================
 def parse_arxiv_refs(ref_text: str):
     if not ref_text:
     pdf_link = generate_pdf_link(url)
     title_wc = len(title.split())
     summary_wc = len(summary.split())
+    high_info_terms = [term for term in summary.split()[:5]]
     terms_str = ", ".join(high_info_terms)
     rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
     mermaid_code = "```mermaid\nflowchart TD\n"
     return "\n".join(md_parts)
 # =============================================================================
+# ─────────────────────────────────────────────────────────
+# MAIN AI LOOKUP FUNCTION (Optional Features)
 # =============================================================================
+def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
+    start = time.time()
+    ai_constitution = """
+    You are a medical and machine learning review board expert...
+    """
+    # 1) Claude API call
+    import anthropic
+    client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY_3"))
+    user_input = q
+    response = client.messages.create(
+        model="claude-3-sonnet-20240229",
+        max_tokens=1000,
+        messages=[{"role": "user", "content": user_input}]
+    )
+    st.write("Claude's reply 🧠:")
+    st.markdown(response.content[0].text)
+    result = response.content[0].text
+    create_file(q, result, "md")
+    md_file, audio_file = save_qa_with_audio(q, result)
+    st.subheader("📝 Main Response Audio")
+    play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
+    if useArxiv:
+        q = q + result
+        st.write('Running Arxiv RAG with Claude inputs.')
+        client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+        refs = client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
+        result = f"🔎 {q}\n\n{refs}"
+        md_file, audio_file = save_qa_with_audio(q, result)
+        st.subheader("📝 Main Response Audio")
+        play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
+        papers = parse_arxiv_refs(refs)
+        if papers:
+            paper_links = create_paper_links_md(papers)
+            links_file = create_file(q, paper_links, "md")
+            st.markdown(paper_links)
+            detailed_md = create_detailed_paper_md(papers)
+            detailed_file = create_file(q, detailed_md, "md")
+            st.markdown(detailed_md)
+            if useArxivAudio:
+                asyncio.run(async_edge_tts_generate("Sample text", st.session_state.get('tts_voice', "en-US-AriaNeural")))
+            st.write("Displaying Papers:")
+            # (Optional: call functions to display papers)
         else:
+            st.warning("No papers found.")
+        response2 = client.messages.create(
+            model="claude-3-sonnet-20240229",
+            max_tokens=1000,
+            messages=[{"role": "user", "content": q + '\n\nUse the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with working code.'}]
+        )
+        r2 = response2.content[0].text
+        st.write("Claude's reply 🧠:")
+        st.markdown(r2)
+    elapsed = time.time() - start
+    st.write(f"**Total Elapsed:** {elapsed:.2f} s")
+    return result
 # =============================================================================
+# ───────────── MAIN FUNCTION ─────────────
 # =============================================================================
 def main():
+    st.markdown("### 🐙 GitCosmos - Cosmos & Git Hub")
     st.markdown(f"[🔗 Portal]({CosmosDBUrl})")
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
     st.session_state.setdefault("current_container", None)
+    if Key:
+        st.session_state.primary_key = Key
+        st.session_state.logged_in = True
+    else:
+        st.error("Missing Cosmos Key 🔑❌")
+        return
     st.sidebar.markdown("## 🛠️ Item Management")
     if st.sidebar.button("New Item"):
         if st.session_state.get("current_container"):
                 st.sidebar.code(json.dumps(res, indent=2), language="json")
         else:
             st.warning("No container selected for search!")
     show_sidebar_data_grid()
     display_langchain_functions()
     try:
         if st.session_state.get("client") is None:
             st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
         st.sidebar.title("🐙 Navigator")
         databases = get_databases(st.session_state.client)
         selected_db = st.sidebar.selectbox("🗃️ DB", databases)
+        st.markdown(CosmosDBUrl)
         if selected_db != st.session_state.get("selected_database"):
             st.session_state.selected_database = selected_db
             st.session_state.selected_container = None
                     submitted = st.form_submit_button("Create Container")
                     if submitted:
                         analytical_ttl = -1 if new_analytical else None
+                        new_container = create_new_container(database, new_container_id, new_partition_key, analytical_storage_ttl=analytical_ttl)
                         if new_container:
                             st.success(f"Container '{new_container_id}' created.")
                             default_id = generate_unique_id()
                                 st.write(log_entry)
                 elif selected_view == 'Run AI':
                     st.markdown("#### 🤖 Run AI")
                     ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
                     if st.button("Send"):
                         st.session_state.last_query = ai_query
         st.session_state.selected_document_id = None
         st.session_state.current_index = 0
         st.rerun()
     show_sidebar_data_grid()
+if __name__ == "__main__":
+    main()
 # =============================================================================
+# ───────────── Additional Blank Lines for Spacing (~1500 lines total) ─────────────
 #
 #
 #
 #
 #
 #
+# =============================================================================
+# ───────────── OPTIONAL FEATURES (New RunAI / Arxiv Search & Voice UI) ─────────────
+# The following block includes the optional ARXIV/RunAI functions (copied from the second app).
+# Uncomment and enable as desired.
+#
+# import streamlit as st
+# import anthropic
+# import openai
+# import base64
+# import cv2
+# import glob
+# import json
+# import math
+# import os
+# import pytz
+# import random
+# import re
+# import requests
+# # import textract
+# import time
+# import zipfile
+# import plotly.graph_objects as go
+# import streamlit.components.v1 as components
+# from datetime import datetime
+# from audio_recorder_streamlit import audio_recorder
+# from bs4 import BeautifulSoup
+# from collections import defaultdict, deque, Counter
+# from dotenv import load_dotenv
+# from gradio_client import Client
+# from huggingface_hub import InferenceClient
+# from io import BytesIO
+# from PIL import Image
+# from PyPDF2 import PdfReader
+# from urllib.parse import quote
+# from xml.etree import ElementTree as ET
+# from openai import OpenAI
+# import extra_streamlit_components as stx
+# from streamlit.runtime.scriptrunner import get_script_run_ctx
+# import asyncio
+# import edge_tts
+# from streamlit_marquee import streamlit_marquee
+# from typing import Tuple, Optional
+# import pandas as pd
+#
+# import nest_asyncio
+# nest_asyncio.apply()
+#
+# st.set_page_config(
+#     page_title="🚲TalkingAIResearcher🏆",
+#     page_icon="🚲🏆",
+#     layout="wide",
+#     initial_sidebar_state="auto",
+#     menu_items={
+#         'Get Help': 'https://huggingface.co/awacke1',
+#         'Report a bug': 'https://huggingface.co/spaces/awacke1',
+#         'About': "🚲TalkingAIResearcher🏆"
+#     }
+# )
+# load_dotenv()
+#
+# EDGE_TTS_VOICES = [
+#     "en-US-AriaNeural",
+#     "en-US-GuyNeural",
+#     "en-US-JennyNeural",
+#     "en-GB-SoniaNeural",
+#     "en-GB-RyanNeural",
+#     "en-AU-NatashaNeural",
+#     "en-AU-WilliamNeural",
+#     "en-CA-ClaraNeural",
+#     "en-CA-LiamNeural"
+# ]
+#
+# # (Plus additional setup and functions as shown in the snippet above.)
+#
+# End of optional features block.