Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,40 +1,36 @@
|
|
| 1 |
# app.py
|
| 2 |
# =============================================================================
|
| 3 |
-
#
|
| 4 |
# =============================================================================
|
| 5 |
-
import base64
|
| 6 |
-
import glob
|
| 7 |
-
import hashlib
|
| 8 |
-
import json
|
| 9 |
-
import os
|
| 10 |
-
import pandas as pd
|
| 11 |
-
import pytz
|
| 12 |
-
import random
|
| 13 |
-
import re
|
| 14 |
-
import shutil
|
| 15 |
-
import streamlit as st
|
| 16 |
-
import time
|
| 17 |
-
import traceback
|
| 18 |
-
import uuid
|
| 19 |
-
import zipfile
|
| 20 |
-
from PIL import Image
|
| 21 |
-
from azure.cosmos import CosmosClient, PartitionKey, exceptions
|
| 22 |
-
from datetime import datetime
|
| 23 |
-
from git import Repo
|
| 24 |
-
from github import Github
|
| 25 |
-
from gradio_client import Client, handle_file
|
| 26 |
-
import tempfile
|
| 27 |
-
import io
|
| 28 |
-
import requests
|
| 29 |
-
import numpy as np
|
| 30 |
-
from urllib.parse import quote
|
| 31 |
-
|
| 32 |
-
# Allow nested asyncio.run calls (needed for our async TTS and Arxiv search)
|
| 33 |
-
import nest_asyncio
|
| 34 |
-
nest_asyncio.apply()
|
| 35 |
|
| 36 |
# =============================================================================
|
| 37 |
-
#
|
| 38 |
# =============================================================================
|
| 39 |
external_links = [
|
| 40 |
{"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "๐ป"},
|
|
@@ -50,7 +46,7 @@ external_links = [
|
|
| 50 |
]
|
| 51 |
|
| 52 |
# =============================================================================
|
| 53 |
-
#
|
| 54 |
# =============================================================================
|
| 55 |
Site_Name = '๐ GitCosmos'
|
| 56 |
title = "๐ GitCosmos"
|
|
@@ -78,9 +74,8 @@ LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI"
|
|
| 78 |
CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
|
| 79 |
|
| 80 |
# =============================================================================
|
| 81 |
-
#
|
| 82 |
# =============================================================================
|
| 83 |
-
# ๐ Get a download link for a file
|
| 84 |
def get_download_link(file_path):
|
| 85 |
with open(file_path, "rb") as file:
|
| 86 |
contents = file.read()
|
|
@@ -88,7 +83,6 @@ def get_download_link(file_path):
|
|
| 88 |
file_name = os.path.basename(file_path)
|
| 89 |
return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} ๐</a>'
|
| 90 |
|
| 91 |
-
# ๐ Generate a unique ID
|
| 92 |
def generate_unique_id():
|
| 93 |
timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
|
| 94 |
unique_uuid = str(uuid.uuid4())
|
|
@@ -96,27 +90,23 @@ def generate_unique_id():
|
|
| 96 |
st.write('New ID: ' + return_value)
|
| 97 |
return return_value
|
| 98 |
|
| 99 |
-
# ๐ Generate a safe filename based on a prompt
|
| 100 |
def generate_filename(prompt, file_type):
|
| 101 |
central = pytz.timezone('US/Central')
|
| 102 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
| 103 |
safe_prompt = re.sub(r'\W+', '', prompt)[:90]
|
| 104 |
return f"{safe_date_time}{safe_prompt}.{file_type}"
|
| 105 |
|
| 106 |
-
# ๐ Create a file with given content
|
| 107 |
def create_file(filename, prompt, response, should_save=True):
|
| 108 |
if not should_save:
|
| 109 |
return
|
| 110 |
with open(filename, 'w', encoding='utf-8') as file:
|
| 111 |
file.write(prompt + "\n\n" + response)
|
| 112 |
|
| 113 |
-
# ๐ Load file contents
|
| 114 |
def load_file(file_name):
|
| 115 |
with open(file_name, "r", encoding='utf-8') as file:
|
| 116 |
content = file.read()
|
| 117 |
return content
|
| 118 |
|
| 119 |
-
# ๐ Display a glossary entity with quick search links
|
| 120 |
def display_glossary_entity(k):
|
| 121 |
search_urls = {
|
| 122 |
"๐": lambda k: f"/?q={k}",
|
|
@@ -127,7 +117,6 @@ def display_glossary_entity(k):
|
|
| 127 |
links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
|
| 128 |
st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
|
| 129 |
|
| 130 |
-
# ๐ฆ Create a ZIP archive of given files
|
| 131 |
def create_zip_of_files(files):
|
| 132 |
zip_name = "all_files.zip"
|
| 133 |
with zipfile.ZipFile(zip_name, 'w') as zipf:
|
|
@@ -135,7 +124,6 @@ def create_zip_of_files(files):
|
|
| 135 |
zipf.write(file)
|
| 136 |
return zip_name
|
| 137 |
|
| 138 |
-
# ๐ฅ Get HTML to embed a video
|
| 139 |
def get_video_html(video_path, width="100%"):
|
| 140 |
video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
|
| 141 |
return f'''
|
|
@@ -145,7 +133,6 @@ def get_video_html(video_path, width="100%"):
|
|
| 145 |
</video>
|
| 146 |
'''
|
| 147 |
|
| 148 |
-
# ๐ต Get HTML to embed audio
|
| 149 |
def get_audio_html(audio_path, width="100%"):
|
| 150 |
audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
|
| 151 |
return f'''
|
|
@@ -155,7 +142,6 @@ def get_audio_html(audio_path, width="100%"):
|
|
| 155 |
</audio>
|
| 156 |
'''
|
| 157 |
|
| 158 |
-
# โ๏ธ Preprocess text (e.g., for JSON safety)
|
| 159 |
def preprocess_text(text):
|
| 160 |
text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
|
| 161 |
text = text.replace('"', '\\"')
|
|
@@ -164,7 +150,7 @@ def preprocess_text(text):
|
|
| 164 |
return text.strip()
|
| 165 |
|
| 166 |
# =============================================================================
|
| 167 |
-
#
|
| 168 |
# =============================================================================
|
| 169 |
def get_databases(client):
|
| 170 |
return [db['id'] for db in client.list_databases()]
|
|
@@ -269,7 +255,7 @@ def archive_current_container(database_name, container_name, client):
|
|
| 269 |
return f"Archive error: {str(e)} ๐ข"
|
| 270 |
|
| 271 |
# =============================================================================
|
| 272 |
-
#
|
| 273 |
# =============================================================================
|
| 274 |
def create_new_container(database, container_id, partition_key_path,
|
| 275 |
analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
|
|
@@ -338,7 +324,7 @@ def vector_search(container, query_vector, vector_field, top=10, exact_search=Fa
|
|
| 338 |
return results
|
| 339 |
|
| 340 |
# =============================================================================
|
| 341 |
-
#
|
| 342 |
# =============================================================================
|
| 343 |
def download_github_repo(url, local_path):
|
| 344 |
if os.path.exists(local_path):
|
|
@@ -371,7 +357,7 @@ def push_to_github(local_path, repo, github_token):
|
|
| 371 |
origin.push(refspec=f'{current_branch}:{current_branch}')
|
| 372 |
|
| 373 |
# =============================================================================
|
| 374 |
-
#
|
| 375 |
# =============================================================================
|
| 376 |
def display_saved_files_in_sidebar():
|
| 377 |
all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
|
|
@@ -413,7 +399,11 @@ def display_file_editor(file_path):
|
|
| 413 |
return
|
| 414 |
st.markdown("### โ๏ธ Edit File")
|
| 415 |
st.markdown(f"**Editing:** {file_path}")
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
col1, col2 = st.columns([1, 5])
|
| 418 |
with col1:
|
| 419 |
if st.button("๐พ Save"):
|
|
@@ -495,34 +485,35 @@ def update_file_management_section():
|
|
| 495 |
display_file_editor(st.session_state.current_file)
|
| 496 |
|
| 497 |
# =============================================================================
|
| 498 |
-
#
|
| 499 |
# =============================================================================
|
| 500 |
-
def show_sidebar_data_grid(
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
|
|
|
| 523 |
|
| 524 |
# =============================================================================
|
| 525 |
-
#
|
| 526 |
# =============================================================================
|
| 527 |
def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
|
| 528 |
try:
|
|
@@ -638,7 +629,58 @@ def add_video_generation_ui(container):
|
|
| 638 |
st.error(f"Upload error: {str(e)}")
|
| 639 |
|
| 640 |
# =============================================================================
|
| 641 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
# =============================================================================
|
| 643 |
def new_item_default(container):
|
| 644 |
new_id = generate_unique_id()
|
|
@@ -681,7 +723,7 @@ def add_field_to_doc():
|
|
| 681 |
st.error(f"Error adding field: {str(e)}")
|
| 682 |
|
| 683 |
# =============================================================================
|
| 684 |
-
#
|
| 685 |
# =============================================================================
|
| 686 |
def vector_keyword_search(keyword, container):
|
| 687 |
try:
|
|
@@ -693,7 +735,7 @@ def vector_keyword_search(keyword, container):
|
|
| 693 |
return []
|
| 694 |
|
| 695 |
# =============================================================================
|
| 696 |
-
#
|
| 697 |
# =============================================================================
|
| 698 |
def new_ai_record(container):
|
| 699 |
new_id = generate_unique_id()
|
|
@@ -737,7 +779,7 @@ def new_links_record(container):
|
|
| 737 |
return None
|
| 738 |
|
| 739 |
# =============================================================================
|
| 740 |
-
#
|
| 741 |
# =============================================================================
|
| 742 |
def display_langchain_functions():
|
| 743 |
functions = [
|
|
@@ -750,37 +792,102 @@ def display_langchain_functions():
|
|
| 750 |
st.sidebar.write(f"{func['name']}: {func['comment']}")
|
| 751 |
|
| 752 |
# =============================================================================
|
| 753 |
-
#
|
| 754 |
-
# NEW: SIDEBAR DATA GRID FUNCTION
|
| 755 |
# =============================================================================
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
try:
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
formatted = ts
|
| 769 |
-
data.append({
|
| 770 |
-
"ID": rec.get("id", ""),
|
| 771 |
-
"Name": rec.get("name", ""),
|
| 772 |
-
"Timestamp": formatted
|
| 773 |
-
})
|
| 774 |
-
df = pd.DataFrame(data)
|
| 775 |
-
st.sidebar.markdown("### ๐ Data Grid")
|
| 776 |
-
st.sidebar.dataframe(df)
|
| 777 |
except Exception as e:
|
| 778 |
-
st.
|
| 779 |
-
|
| 780 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 781 |
|
| 782 |
# =============================================================================
|
| 783 |
-
#
|
| 784 |
# =============================================================================
|
| 785 |
def parse_arxiv_refs(ref_text: str):
|
| 786 |
if not ref_text:
|
|
@@ -848,7 +955,7 @@ def generate_5min_feature_markdown(paper: dict) -> str:
|
|
| 848 |
pdf_link = generate_pdf_link(url)
|
| 849 |
title_wc = len(title.split())
|
| 850 |
summary_wc = len(summary.split())
|
| 851 |
-
high_info_terms = [term for term in summary.split()[:5]]
|
| 852 |
terms_str = ", ".join(high_info_terms)
|
| 853 |
rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
|
| 854 |
mermaid_code = "```mermaid\nflowchart TD\n"
|
|
@@ -881,107 +988,80 @@ def create_detailed_paper_md(papers: list) -> str:
|
|
| 881 |
return "\n".join(md_parts)
|
| 882 |
|
| 883 |
# =============================================================================
|
| 884 |
-
#
|
|
|
|
| 885 |
# =============================================================================
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
async def async_save_qa_with_audio(question: str, answer: str):
|
| 929 |
-
with PerformanceTimer("qa_save") as timer:
|
| 930 |
-
md_file = create_file(question, answer, "md")
|
| 931 |
-
audio_file = None
|
| 932 |
-
if st.session_state.get('enable_audio', True):
|
| 933 |
-
audio_text = f"{question}\n\nAnswer: {answer}"
|
| 934 |
-
audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
|
| 935 |
-
return md_file, audio_file, time.time() - timer.start_time, 0
|
| 936 |
-
|
| 937 |
-
def save_qa_with_audio(question, answer, voice=None):
|
| 938 |
-
if not voice:
|
| 939 |
-
voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
|
| 940 |
-
md_file = create_file(question, answer, "md")
|
| 941 |
-
audio_text = f"{question}\n\nAnswer: {answer}"
|
| 942 |
-
audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
|
| 943 |
-
return md_file, audio_file
|
| 944 |
-
|
| 945 |
-
def play_and_download_audio(file_path, file_type="mp3"):
|
| 946 |
-
if file_path and os.path.exists(file_path):
|
| 947 |
-
st.audio(file_path)
|
| 948 |
-
dl_link = get_download_link(file_path, file_type=file_type)
|
| 949 |
-
st.markdown(dl_link, unsafe_allow_html=True)
|
| 950 |
-
|
| 951 |
-
def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
|
| 952 |
-
cache_key = f"dl_{file_path}"
|
| 953 |
-
if cache_key in st.session_state.get('download_link_cache', {}):
|
| 954 |
-
return st.session_state['download_link_cache'][cache_key]
|
| 955 |
-
try:
|
| 956 |
-
with open(file_path, "rb") as f:
|
| 957 |
-
b64 = base64.b64encode(f.read()).decode()
|
| 958 |
-
filename = os.path.basename(file_path)
|
| 959 |
-
if file_type == "mp3":
|
| 960 |
-
link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">๐ต Download {filename}</a>'
|
| 961 |
-
elif file_type == "wav":
|
| 962 |
-
link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">๐ Download {filename}</a>'
|
| 963 |
-
elif file_type == "md":
|
| 964 |
-
link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">๐ Download {filename}</a>'
|
| 965 |
else:
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 972 |
|
| 973 |
# =============================================================================
|
| 974 |
-
#
|
| 975 |
-
# MAIN FUNCTION
|
| 976 |
# =============================================================================
|
| 977 |
def main():
|
| 978 |
-
|
| 979 |
st.markdown(f"[๐ Portal]({CosmosDBUrl})")
|
| 980 |
-
# Initialize some session state keys if not already present
|
| 981 |
if "chat_history" not in st.session_state:
|
| 982 |
st.session_state.chat_history = []
|
| 983 |
st.session_state.setdefault("current_container", None)
|
| 984 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
st.sidebar.markdown("## ๐ ๏ธ Item Management")
|
| 986 |
if st.sidebar.button("New Item"):
|
| 987 |
if st.session_state.get("current_container"):
|
|
@@ -1017,17 +1097,15 @@ def main():
|
|
| 1017 |
st.sidebar.code(json.dumps(res, indent=2), language="json")
|
| 1018 |
else:
|
| 1019 |
st.warning("No container selected for search!")
|
| 1020 |
-
# Show the sidebar data grid with records
|
| 1021 |
show_sidebar_data_grid()
|
| 1022 |
-
# Display Langchain functions in sidebar
|
| 1023 |
display_langchain_functions()
|
| 1024 |
-
# Navigator: Container selection and data grid
|
| 1025 |
try:
|
| 1026 |
if st.session_state.get("client") is None:
|
| 1027 |
st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
|
| 1028 |
st.sidebar.title("๐ Navigator")
|
| 1029 |
databases = get_databases(st.session_state.client)
|
| 1030 |
selected_db = st.sidebar.selectbox("๐๏ธ DB", databases)
|
|
|
|
| 1031 |
if selected_db != st.session_state.get("selected_database"):
|
| 1032 |
st.session_state.selected_database = selected_db
|
| 1033 |
st.session_state.selected_container = None
|
|
@@ -1048,12 +1126,7 @@ def main():
|
|
| 1048 |
submitted = st.form_submit_button("Create Container")
|
| 1049 |
if submitted:
|
| 1050 |
analytical_ttl = -1 if new_analytical else None
|
| 1051 |
-
new_container = create_new_container(
|
| 1052 |
-
database,
|
| 1053 |
-
new_container_id,
|
| 1054 |
-
new_partition_key,
|
| 1055 |
-
analytical_storage_ttl=analytical_ttl
|
| 1056 |
-
)
|
| 1057 |
if new_container:
|
| 1058 |
st.success(f"Container '{new_container_id}' created.")
|
| 1059 |
default_id = generate_unique_id()
|
|
@@ -1157,7 +1230,6 @@ def main():
|
|
| 1157 |
st.write(log_entry)
|
| 1158 |
elif selected_view == 'Run AI':
|
| 1159 |
st.markdown("#### ๐ค Run AI")
|
| 1160 |
-
# NEW: Use a text area and a Send button (message button UI)
|
| 1161 |
ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
|
| 1162 |
if st.button("Send"):
|
| 1163 |
st.session_state.last_query = ai_query
|
|
@@ -1258,20 +1330,14 @@ def main():
|
|
| 1258 |
st.session_state.selected_document_id = None
|
| 1259 |
st.session_state.current_index = 0
|
| 1260 |
st.rerun()
|
| 1261 |
-
|
| 1262 |
-
# Also display the sidebar data grid (records overview)
|
| 1263 |
show_sidebar_data_grid()
|
| 1264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1265 |
# =============================================================================
|
| 1266 |
-
# Additional Blank Lines for Spacing (~1500 lines total)
|
| 1267 |
-
# =============================================================================
|
| 1268 |
-
#
|
| 1269 |
-
#
|
| 1270 |
-
#
|
| 1271 |
-
#
|
| 1272 |
-
#
|
| 1273 |
-
#
|
| 1274 |
-
#
|
| 1275 |
#
|
| 1276 |
#
|
| 1277 |
#
|
|
@@ -1396,18 +1462,78 @@ def main():
|
|
| 1396 |
#
|
| 1397 |
#
|
| 1398 |
#
|
| 1399 |
-
#
|
| 1400 |
-
#
|
| 1401 |
-
#
|
| 1402 |
-
#
|
| 1403 |
-
#
|
| 1404 |
-
#
|
| 1405 |
-
#
|
| 1406 |
-
#
|
| 1407 |
-
#
|
| 1408 |
-
#
|
| 1409 |
-
#
|
| 1410 |
-
#
|
| 1411 |
-
#
|
| 1412 |
-
#
|
| 1413 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# app.py
|
| 2 |
# =============================================================================
|
| 3 |
+
# โโโโโโโโโโโโโ IMPORTS โโโโโโโโโโโโโ
|
| 4 |
# =============================================================================
|
| 5 |
+
import base64
|
| 6 |
+
import glob
|
| 7 |
+
import hashlib
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import pytz
|
| 12 |
+
import random
|
| 13 |
+
import re
|
| 14 |
+
import shutil
|
| 15 |
+
import streamlit as st
|
| 16 |
+
import time
|
| 17 |
+
import traceback
|
| 18 |
+
import uuid
|
| 19 |
+
import zipfile
|
| 20 |
+
from PIL import Image
|
| 21 |
+
from azure.cosmos import CosmosClient, PartitionKey, exceptions
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
from git import Repo
|
| 24 |
+
from github import Github
|
| 25 |
+
from gradio_client import Client, handle_file
|
| 26 |
+
import tempfile
|
| 27 |
+
import io
|
| 28 |
+
import requests
|
| 29 |
+
import numpy as np
|
| 30 |
+
from urllib.parse import quote
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# =============================================================================
|
| 33 |
+
# โโโโโโโโโโโโโ EXTERNAL HELP LINKS (Always visible in sidebar) โโโโโโโโโโโโโ
|
| 34 |
# =============================================================================
|
| 35 |
external_links = [
|
| 36 |
{"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "๐ป"},
|
|
|
|
| 46 |
]
|
| 47 |
|
| 48 |
# =============================================================================
|
| 49 |
+
# โโโโโโโโโโโโโ APP CONFIGURATION โโโโโโโโโโโโโ
|
| 50 |
# =============================================================================
|
| 51 |
Site_Name = '๐ GitCosmos'
|
| 52 |
title = "๐ GitCosmos"
|
|
|
|
| 74 |
CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
|
| 75 |
|
| 76 |
# =============================================================================
|
| 77 |
+
# โโโโโโโโโโโโโ HELPER FUNCTIONS โโโโโโโโโโโโโ
|
| 78 |
# =============================================================================
|
|
|
|
| 79 |
def get_download_link(file_path):
|
| 80 |
with open(file_path, "rb") as file:
|
| 81 |
contents = file.read()
|
|
|
|
| 83 |
file_name = os.path.basename(file_path)
|
| 84 |
return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} ๐</a>'
|
| 85 |
|
|
|
|
| 86 |
def generate_unique_id():
|
| 87 |
timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
|
| 88 |
unique_uuid = str(uuid.uuid4())
|
|
|
|
| 90 |
st.write('New ID: ' + return_value)
|
| 91 |
return return_value
|
| 92 |
|
|
|
|
| 93 |
def generate_filename(prompt, file_type):
|
| 94 |
central = pytz.timezone('US/Central')
|
| 95 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
| 96 |
safe_prompt = re.sub(r'\W+', '', prompt)[:90]
|
| 97 |
return f"{safe_date_time}{safe_prompt}.{file_type}"
|
| 98 |
|
|
|
|
| 99 |
def create_file(filename, prompt, response, should_save=True):
|
| 100 |
if not should_save:
|
| 101 |
return
|
| 102 |
with open(filename, 'w', encoding='utf-8') as file:
|
| 103 |
file.write(prompt + "\n\n" + response)
|
| 104 |
|
|
|
|
| 105 |
def load_file(file_name):
|
| 106 |
with open(file_name, "r", encoding='utf-8') as file:
|
| 107 |
content = file.read()
|
| 108 |
return content
|
| 109 |
|
|
|
|
| 110 |
def display_glossary_entity(k):
|
| 111 |
search_urls = {
|
| 112 |
"๐": lambda k: f"/?q={k}",
|
|
|
|
| 117 |
links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
|
| 118 |
st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
|
| 119 |
|
|
|
|
| 120 |
def create_zip_of_files(files):
|
| 121 |
zip_name = "all_files.zip"
|
| 122 |
with zipfile.ZipFile(zip_name, 'w') as zipf:
|
|
|
|
| 124 |
zipf.write(file)
|
| 125 |
return zip_name
|
| 126 |
|
|
|
|
| 127 |
def get_video_html(video_path, width="100%"):
|
| 128 |
video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
|
| 129 |
return f'''
|
|
|
|
| 133 |
</video>
|
| 134 |
'''
|
| 135 |
|
|
|
|
| 136 |
def get_audio_html(audio_path, width="100%"):
|
| 137 |
audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
|
| 138 |
return f'''
|
|
|
|
| 142 |
</audio>
|
| 143 |
'''
|
| 144 |
|
|
|
|
| 145 |
def preprocess_text(text):
|
| 146 |
text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
|
| 147 |
text = text.replace('"', '\\"')
|
|
|
|
| 150 |
return text.strip()
|
| 151 |
|
| 152 |
# =============================================================================
|
| 153 |
+
# โโโโโโโโโโโโโ COSMOS DB FUNCTIONS โโโโโโโโโโโโโ
|
| 154 |
# =============================================================================
|
| 155 |
def get_databases(client):
|
| 156 |
return [db['id'] for db in client.list_databases()]
|
|
|
|
| 255 |
return f"Archive error: {str(e)} ๐ข"
|
| 256 |
|
| 257 |
# =============================================================================
|
| 258 |
+
# โโโโโโโโโโโโโ ADVANCED COSMOS FUNCTIONS โโโโโโโโโโโโโ
|
| 259 |
# =============================================================================
|
| 260 |
def create_new_container(database, container_id, partition_key_path,
|
| 261 |
analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
|
|
|
|
| 324 |
return results
|
| 325 |
|
| 326 |
# =============================================================================
|
| 327 |
+
# โโโโโโโโโโโโโ GITHUB FUNCTIONS โโโโโโโโโโโโโ
|
| 328 |
# =============================================================================
|
| 329 |
def download_github_repo(url, local_path):
|
| 330 |
if os.path.exists(local_path):
|
|
|
|
| 357 |
origin.push(refspec=f'{current_branch}:{current_branch}')
|
| 358 |
|
| 359 |
# =============================================================================
|
| 360 |
+
# โโโโโโโโโโโโโ FILE & MEDIA MANAGEMENT FUNCTIONS โโโโโโโโโโโโโ
|
| 361 |
# =============================================================================
|
| 362 |
def display_saved_files_in_sidebar():
|
| 363 |
all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
|
|
|
|
| 399 |
return
|
| 400 |
st.markdown("### โ๏ธ Edit File")
|
| 401 |
st.markdown(f"**Editing:** {file_path}")
|
| 402 |
+
md_tab, code_tab = st.tabs(["Markdown", "Code"])
|
| 403 |
+
with md_tab:
|
| 404 |
+
st.markdown(st.session_state.file_content[file_path])
|
| 405 |
+
with code_tab:
|
| 406 |
+
new_content = st.text_area("Edit:", value=st.session_state.file_content[file_path], height=400, key=f"editor_{hash(file_path)}", on_change=lambda: auto_save_edit())
|
| 407 |
col1, col2 = st.columns([1, 5])
|
| 408 |
with col1:
|
| 409 |
if st.button("๐พ Save"):
|
|
|
|
| 485 |
display_file_editor(st.session_state.current_file)
|
| 486 |
|
| 487 |
# =============================================================================
|
| 488 |
+
# โโโโโโโโโโโโโ SIDEBAR DATA GRID (Records with formatted timestamps) โโโโโโโโโโโโโ
|
| 489 |
# =============================================================================
|
| 490 |
+
def show_sidebar_data_grid():
|
| 491 |
+
if st.session_state.get("current_container"):
|
| 492 |
+
try:
|
| 493 |
+
records = get_documents(st.session_state.current_container)
|
| 494 |
+
data = []
|
| 495 |
+
for rec in records:
|
| 496 |
+
ts = rec.get("timestamp", "")
|
| 497 |
+
try:
|
| 498 |
+
dt = datetime.fromisoformat(ts)
|
| 499 |
+
formatted = dt.strftime("%I:%M %p %m/%d/%Y")
|
| 500 |
+
except Exception:
|
| 501 |
+
formatted = ts
|
| 502 |
+
data.append({
|
| 503 |
+
"ID": rec.get("id", ""),
|
| 504 |
+
"Name": rec.get("name", ""),
|
| 505 |
+
"Timestamp": formatted
|
| 506 |
+
})
|
| 507 |
+
df = pd.DataFrame(data)
|
| 508 |
+
st.sidebar.markdown("### ๐ Data Grid")
|
| 509 |
+
st.sidebar.dataframe(df)
|
| 510 |
+
except Exception as e:
|
| 511 |
+
st.sidebar.error(f"Data grid error: {str(e)}")
|
| 512 |
+
else:
|
| 513 |
+
st.sidebar.info("No container selected for data grid.")
|
| 514 |
|
| 515 |
# =============================================================================
|
| 516 |
+
# โโโโโโโโโโโโโ VIDEO & AUDIO UI FUNCTIONS โโโโโโโโโโโโโ
|
| 517 |
# =============================================================================
|
| 518 |
def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
|
| 519 |
try:
|
|
|
|
| 629 |
st.error(f"Upload error: {str(e)}")
|
| 630 |
|
| 631 |
# =============================================================================
|
| 632 |
+
# โโโโโโโโโโโโโ AI SAMPLES SIDEBAR (Processed as a Python List) โโโโโโโโโโโโโ
|
| 633 |
+
# =============================================================================
|
| 634 |
+
def display_ai_samples():
|
| 635 |
+
ai_samples = [
|
| 636 |
+
{
|
| 637 |
+
"name": "FullTextContains",
|
| 638 |
+
"description": "Query using FullTextContains",
|
| 639 |
+
"query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")'
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"name": "FullTextContainsAll",
|
| 643 |
+
"description": "Query using FullTextContainsAll",
|
| 644 |
+
"query": 'SELECT TOP 10 * FROM c WHERE FullTextContainsAll(c.text, "red", "bicycle")'
|
| 645 |
+
},
|
| 646 |
+
{
|
| 647 |
+
"name": "FullTextContainsAny",
|
| 648 |
+
"description": "Query using FullTextContainsAny",
|
| 649 |
+
"query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "red") AND FullTextContainsAny(c.text, "bicycle", "skateboard")'
|
| 650 |
+
},
|
| 651 |
+
{
|
| 652 |
+
"name": "FullTextScore",
|
| 653 |
+
"description": "Query using FullTextScore (order by relevance)",
|
| 654 |
+
"query": 'SELECT TOP 10 * FROM c ORDER BY RANK FullTextScore(c.text, ["bicycle", "mountain"])'
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"name": "Vector Search with Score",
|
| 658 |
+
"description": "Example vector search snippet",
|
| 659 |
+
"query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5)\nfor result, score in results:\n print(result.json(), score)'
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"name": "Vector Search with Filtering",
|
| 663 |
+
"description": "Example vector search with a filter",
|
| 664 |
+
"query": 'pre_filter = {"conditions": [{"property": "metadata.page", "operator": "$eq", "value": 0}]}\nresults = vector_search.similarity_search_with_score(query="Your query", k=5, pre_filter=pre_filter)'
|
| 665 |
+
},
|
| 666 |
+
{
|
| 667 |
+
"name": "Hybrid Search",
|
| 668 |
+
"description": "Example hybrid search snippet",
|
| 669 |
+
"query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5, query_type=CosmosDBQueryType.HYBRID)'
|
| 670 |
+
}
|
| 671 |
+
]
|
| 672 |
+
st.sidebar.markdown("### ๐ค AI Samples")
|
| 673 |
+
st.sidebar.info("๐ Get started with our AI samples! Time free access to get started today.")
|
| 674 |
+
sample_names = [sample["name"] for sample in ai_samples]
|
| 675 |
+
selected_sample_name = st.sidebar.selectbox("Select an AI Sample", sample_names)
|
| 676 |
+
selected_sample = next((s for s in ai_samples if s["name"] == selected_sample_name), None)
|
| 677 |
+
if selected_sample:
|
| 678 |
+
st.sidebar.markdown(f"**{selected_sample['name']}**: {selected_sample['description']}")
|
| 679 |
+
lang = "sql" if "FullText" in selected_sample["name"] else "python"
|
| 680 |
+
st.sidebar.code(selected_sample["query"], language=lang)
|
| 681 |
+
|
| 682 |
+
# =============================================================================
|
| 683 |
+
# โโโโโโโโโโโโโ NEW ITEM & FIELD FUNCTIONS
|
| 684 |
# =============================================================================
|
| 685 |
def new_item_default(container):
|
| 686 |
new_id = generate_unique_id()
|
|
|
|
| 723 |
st.error(f"Error adding field: {str(e)}")
|
| 724 |
|
| 725 |
# =============================================================================
|
| 726 |
+
# โโโโโโโโโโโโโ VECTOR SEARCH INTERFACE (Simple keyword search)
|
| 727 |
# =============================================================================
|
| 728 |
def vector_keyword_search(keyword, container):
|
| 729 |
try:
|
|
|
|
| 735 |
return []
|
| 736 |
|
| 737 |
# =============================================================================
|
| 738 |
+
# โโโโโโโโโโโโโ NEW AI MODALITY RECORD TEMPLATES
|
| 739 |
# =============================================================================
|
| 740 |
def new_ai_record(container):
|
| 741 |
new_id = generate_unique_id()
|
|
|
|
| 779 |
return None
|
| 780 |
|
| 781 |
# =============================================================================
|
| 782 |
+
# โโโโโโโโโโโโโ LANGCHAIN FUNCTIONS (Witty emoji comments)
|
| 783 |
# =============================================================================
|
| 784 |
def display_langchain_functions():
|
| 785 |
functions = [
|
|
|
|
| 792 |
st.sidebar.write(f"{func['name']}: {func['comment']}")
|
| 793 |
|
| 794 |
# =============================================================================
|
| 795 |
+
# โโโโโโโโโโโโโ OPTIONAL: SIDEBAR DATA GRID (Records with formatted timestamps)
|
|
|
|
| 796 |
# =============================================================================
|
| 797 |
+
# (This feature is now integrated above via show_sidebar_data_grid().)
|
| 798 |
+
|
| 799 |
+
# =============================================================================
|
| 800 |
+
# โโโโโโโโโโโโโ ASYNC TTS & ARXIV FUNCTIONS (Optional Features)
|
| 801 |
+
# =============================================================================
|
| 802 |
+
import asyncio
|
| 803 |
+
import edge_tts
|
| 804 |
+
from streamlit_marquee import streamlit_marquee
|
| 805 |
+
from collections import Counter
|
| 806 |
+
|
| 807 |
+
class PerformanceTimer:
|
| 808 |
+
def __init__(self, operation_name: str):
|
| 809 |
+
self.operation_name = operation_name
|
| 810 |
+
self.start_time = None
|
| 811 |
+
def __enter__(self):
|
| 812 |
+
self.start_time = time.time()
|
| 813 |
+
return self
|
| 814 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 815 |
+
pass
|
| 816 |
+
|
| 817 |
+
async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
|
| 818 |
+
with PerformanceTimer("tts_generation") as timer:
|
| 819 |
+
text = text.replace("\n", " ").strip()
|
| 820 |
+
if not text:
|
| 821 |
+
return None, 0
|
| 822 |
+
cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
|
| 823 |
+
if cache_key in st.session_state.get('audio_cache', {}):
|
| 824 |
+
return st.session_state['audio_cache'][cache_key], 0
|
| 825 |
try:
|
| 826 |
+
rate_str = f"{rate:+d}%"
|
| 827 |
+
pitch_str = f"{pitch:+d}Hz"
|
| 828 |
+
communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
|
| 829 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 830 |
+
filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
|
| 831 |
+
await communicate.save(filename)
|
| 832 |
+
st.session_state.setdefault('audio_cache', {})[cache_key] = filename
|
| 833 |
+
return filename, time.time() - timer.start_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
except Exception as e:
|
| 835 |
+
st.error(f"Error generating audio: {str(e)}")
|
| 836 |
+
return None, 0
|
| 837 |
+
|
| 838 |
+
def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
|
| 839 |
+
result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
|
| 840 |
+
if isinstance(result, tuple):
|
| 841 |
+
return result[0]
|
| 842 |
+
return result
|
| 843 |
+
|
| 844 |
+
async def async_save_qa_with_audio(question: str, answer: str):
|
| 845 |
+
with PerformanceTimer("qa_save") as timer:
|
| 846 |
+
md_file = create_file(question, answer, "md")
|
| 847 |
+
audio_file = None
|
| 848 |
+
if st.session_state.get('enable_audio', True):
|
| 849 |
+
audio_text = f"{question}\n\nAnswer: {answer}"
|
| 850 |
+
audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
|
| 851 |
+
return md_file, audio_file, time.time() - timer.start_time, 0
|
| 852 |
+
|
| 853 |
+
def save_qa_with_audio(question, answer, voice=None):
|
| 854 |
+
if not voice:
|
| 855 |
+
voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
|
| 856 |
+
md_file = create_file(question, answer, "md")
|
| 857 |
+
audio_text = f"{question}\n\nAnswer: {answer}"
|
| 858 |
+
audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
|
| 859 |
+
return md_file, audio_file
|
| 860 |
+
|
| 861 |
+
def play_and_download_audio(file_path, file_type="mp3"):
|
| 862 |
+
if file_path and os.path.exists(file_path):
|
| 863 |
+
st.audio(file_path)
|
| 864 |
+
dl_link = get_download_link(file_path, file_type=file_type)
|
| 865 |
+
st.markdown(dl_link, unsafe_allow_html=True)
|
| 866 |
+
|
| 867 |
+
def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
|
| 868 |
+
cache_key = f"dl_{file_path}"
|
| 869 |
+
if cache_key in st.session_state.get('download_link_cache', {}):
|
| 870 |
+
return st.session_state['download_link_cache'][cache_key]
|
| 871 |
+
try:
|
| 872 |
+
with open(file_path, "rb") as f:
|
| 873 |
+
b64 = base64.b64encode(f.read()).decode()
|
| 874 |
+
filename = os.path.basename(file_path)
|
| 875 |
+
if file_type == "mp3":
|
| 876 |
+
link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">๐ต Download {filename}</a>'
|
| 877 |
+
elif file_type == "wav":
|
| 878 |
+
link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">๐ Download {filename}</a>'
|
| 879 |
+
elif file_type == "md":
|
| 880 |
+
link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">๐ Download {filename}</a>'
|
| 881 |
+
else:
|
| 882 |
+
link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
|
| 883 |
+
st.session_state.setdefault('download_link_cache', {})[cache_key] = link
|
| 884 |
+
return link
|
| 885 |
+
except Exception as e:
|
| 886 |
+
st.error(f"Error creating download link: {str(e)}")
|
| 887 |
+
return ""
|
| 888 |
|
| 889 |
# =============================================================================
|
| 890 |
+
# โโโโโโโโโโโโโ RESEARCH / ARXIV FUNCTIONS (Optional Features)
|
| 891 |
# =============================================================================
|
| 892 |
def parse_arxiv_refs(ref_text: str):
|
| 893 |
if not ref_text:
|
|
|
|
| 955 |
pdf_link = generate_pdf_link(url)
|
| 956 |
title_wc = len(title.split())
|
| 957 |
summary_wc = len(summary.split())
|
| 958 |
+
high_info_terms = [term for term in summary.split()[:5]]
|
| 959 |
terms_str = ", ".join(high_info_terms)
|
| 960 |
rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
|
| 961 |
mermaid_code = "```mermaid\nflowchart TD\n"
|
|
|
|
| 988 |
return "\n".join(md_parts)
|
| 989 |
|
| 990 |
# =============================================================================
|
| 991 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 992 |
+
# MAIN AI LOOKUP FUNCTION (Optional Features)
|
| 993 |
# =============================================================================
|
| 994 |
+
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
|
| 995 |
+
start = time.time()
|
| 996 |
+
ai_constitution = """
|
| 997 |
+
You are a medical and machine learning review board expert...
|
| 998 |
+
"""
|
| 999 |
+
# 1) Claude API call
|
| 1000 |
+
import anthropic
|
| 1001 |
+
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY_3"))
|
| 1002 |
+
user_input = q
|
| 1003 |
+
response = client.messages.create(
|
| 1004 |
+
model="claude-3-sonnet-20240229",
|
| 1005 |
+
max_tokens=1000,
|
| 1006 |
+
messages=[{"role": "user", "content": user_input}]
|
| 1007 |
+
)
|
| 1008 |
+
st.write("Claude's reply ๐ง :")
|
| 1009 |
+
st.markdown(response.content[0].text)
|
| 1010 |
+
result = response.content[0].text
|
| 1011 |
+
create_file(q, result, "md")
|
| 1012 |
+
md_file, audio_file = save_qa_with_audio(q, result)
|
| 1013 |
+
st.subheader("๐ Main Response Audio")
|
| 1014 |
+
play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
|
| 1015 |
+
if useArxiv:
|
| 1016 |
+
q = q + result
|
| 1017 |
+
st.write('Running Arxiv RAG with Claude inputs.')
|
| 1018 |
+
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
| 1019 |
+
refs = client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
|
| 1020 |
+
result = f"๐ {q}\n\n{refs}"
|
| 1021 |
+
md_file, audio_file = save_qa_with_audio(q, result)
|
| 1022 |
+
st.subheader("๐ Main Response Audio")
|
| 1023 |
+
play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
|
| 1024 |
+
papers = parse_arxiv_refs(refs)
|
| 1025 |
+
if papers:
|
| 1026 |
+
paper_links = create_paper_links_md(papers)
|
| 1027 |
+
links_file = create_file(q, paper_links, "md")
|
| 1028 |
+
st.markdown(paper_links)
|
| 1029 |
+
detailed_md = create_detailed_paper_md(papers)
|
| 1030 |
+
detailed_file = create_file(q, detailed_md, "md")
|
| 1031 |
+
st.markdown(detailed_md)
|
| 1032 |
+
if useArxivAudio:
|
| 1033 |
+
asyncio.run(async_edge_tts_generate("Sample text", st.session_state.get('tts_voice', "en-US-AriaNeural")))
|
| 1034 |
+
st.write("Displaying Papers:")
|
| 1035 |
+
# (Optional: call functions to display papers)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1036 |
else:
|
| 1037 |
+
st.warning("No papers found.")
|
| 1038 |
+
response2 = client.messages.create(
|
| 1039 |
+
model="claude-3-sonnet-20240229",
|
| 1040 |
+
max_tokens=1000,
|
| 1041 |
+
messages=[{"role": "user", "content": q + '\n\nUse the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with working code.'}]
|
| 1042 |
+
)
|
| 1043 |
+
r2 = response2.content[0].text
|
| 1044 |
+
st.write("Claude's reply ๐ง :")
|
| 1045 |
+
st.markdown(r2)
|
| 1046 |
+
elapsed = time.time() - start
|
| 1047 |
+
st.write(f"**Total Elapsed:** {elapsed:.2f} s")
|
| 1048 |
+
return result
|
| 1049 |
|
| 1050 |
# =============================================================================
|
| 1051 |
+
# โโโโโโโโโโโโโ MAIN FUNCTION โโโโโโโโโโโโโ
|
|
|
|
| 1052 |
# =============================================================================
|
| 1053 |
def main():
|
| 1054 |
+
st.markdown("### ๐ GitCosmos - Cosmos & Git Hub")
|
| 1055 |
st.markdown(f"[๐ Portal]({CosmosDBUrl})")
|
|
|
|
| 1056 |
if "chat_history" not in st.session_state:
|
| 1057 |
st.session_state.chat_history = []
|
| 1058 |
st.session_state.setdefault("current_container", None)
|
| 1059 |
+
if Key:
|
| 1060 |
+
st.session_state.primary_key = Key
|
| 1061 |
+
st.session_state.logged_in = True
|
| 1062 |
+
else:
|
| 1063 |
+
st.error("Missing Cosmos Key ๐โ")
|
| 1064 |
+
return
|
| 1065 |
st.sidebar.markdown("## ๐ ๏ธ Item Management")
|
| 1066 |
if st.sidebar.button("New Item"):
|
| 1067 |
if st.session_state.get("current_container"):
|
|
|
|
| 1097 |
st.sidebar.code(json.dumps(res, indent=2), language="json")
|
| 1098 |
else:
|
| 1099 |
st.warning("No container selected for search!")
|
|
|
|
| 1100 |
show_sidebar_data_grid()
|
|
|
|
| 1101 |
display_langchain_functions()
|
|
|
|
| 1102 |
try:
|
| 1103 |
if st.session_state.get("client") is None:
|
| 1104 |
st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
|
| 1105 |
st.sidebar.title("๐ Navigator")
|
| 1106 |
databases = get_databases(st.session_state.client)
|
| 1107 |
selected_db = st.sidebar.selectbox("๐๏ธ DB", databases)
|
| 1108 |
+
st.markdown(CosmosDBUrl)
|
| 1109 |
if selected_db != st.session_state.get("selected_database"):
|
| 1110 |
st.session_state.selected_database = selected_db
|
| 1111 |
st.session_state.selected_container = None
|
|
|
|
| 1126 |
submitted = st.form_submit_button("Create Container")
|
| 1127 |
if submitted:
|
| 1128 |
analytical_ttl = -1 if new_analytical else None
|
| 1129 |
+
new_container = create_new_container(database, new_container_id, new_partition_key, analytical_storage_ttl=analytical_ttl)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1130 |
if new_container:
|
| 1131 |
st.success(f"Container '{new_container_id}' created.")
|
| 1132 |
default_id = generate_unique_id()
|
|
|
|
| 1230 |
st.write(log_entry)
|
| 1231 |
elif selected_view == 'Run AI':
|
| 1232 |
st.markdown("#### ๐ค Run AI")
|
|
|
|
| 1233 |
ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
|
| 1234 |
if st.button("Send"):
|
| 1235 |
st.session_state.last_query = ai_query
|
|
|
|
| 1330 |
st.session_state.selected_document_id = None
|
| 1331 |
st.session_state.current_index = 0
|
| 1332 |
st.rerun()
|
|
|
|
|
|
|
| 1333 |
show_sidebar_data_grid()
|
| 1334 |
|
| 1335 |
+
if __name__ == "__main__":
|
| 1336 |
+
main()
|
| 1337 |
+
|
| 1338 |
+
|
| 1339 |
# =============================================================================
|
| 1340 |
+
# โโโโโโโโโโโโโ Additional Blank Lines for Spacing (~1500 lines total) โโโโโโโโโโโโโ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1341 |
#
|
| 1342 |
#
|
| 1343 |
#
|
|
|
|
| 1462 |
#
|
| 1463 |
#
|
| 1464 |
#
|
| 1465 |
+
# =============================================================================
|
| 1466 |
+
# โโโโโโโโโโโโโ OPTIONAL FEATURES (New RunAI / Arxiv Search & Voice UI) โโโโโโโโโโโโโ
|
| 1467 |
+
# The following block includes the optional ARXIV/RunAI functions (copied from the second app).
|
| 1468 |
+
# Uncomment and enable as desired.
|
| 1469 |
+
#
|
| 1470 |
+
# import streamlit as st
|
| 1471 |
+
# import anthropic
|
| 1472 |
+
# import openai
|
| 1473 |
+
# import base64
|
| 1474 |
+
# import cv2
|
| 1475 |
+
# import glob
|
| 1476 |
+
# import json
|
| 1477 |
+
# import math
|
| 1478 |
+
# import os
|
| 1479 |
+
# import pytz
|
| 1480 |
+
# import random
|
| 1481 |
+
# import re
|
| 1482 |
+
# import requests
|
| 1483 |
+
# # import textract
|
| 1484 |
+
# import time
|
| 1485 |
+
# import zipfile
|
| 1486 |
+
# import plotly.graph_objects as go
|
| 1487 |
+
# import streamlit.components.v1 as components
|
| 1488 |
+
# from datetime import datetime
|
| 1489 |
+
# from audio_recorder_streamlit import audio_recorder
|
| 1490 |
+
# from bs4 import BeautifulSoup
|
| 1491 |
+
# from collections import defaultdict, deque, Counter
|
| 1492 |
+
# from dotenv import load_dotenv
|
| 1493 |
+
# from gradio_client import Client
|
| 1494 |
+
# from huggingface_hub import InferenceClient
|
| 1495 |
+
# from io import BytesIO
|
| 1496 |
+
# from PIL import Image
|
| 1497 |
+
# from PyPDF2 import PdfReader
|
| 1498 |
+
# from urllib.parse import quote
|
| 1499 |
+
# from xml.etree import ElementTree as ET
|
| 1500 |
+
# from openai import OpenAI
|
| 1501 |
+
# import extra_streamlit_components as stx
|
| 1502 |
+
# from streamlit.runtime.scriptrunner import get_script_run_ctx
|
| 1503 |
+
# import asyncio
|
| 1504 |
+
# import edge_tts
|
| 1505 |
+
# from streamlit_marquee import streamlit_marquee
|
| 1506 |
+
# from typing import Tuple, Optional
|
| 1507 |
+
# import pandas as pd
|
| 1508 |
+
#
|
| 1509 |
+
# import nest_asyncio
|
| 1510 |
+
# nest_asyncio.apply()
|
| 1511 |
+
#
|
| 1512 |
+
# st.set_page_config(
|
| 1513 |
+
# page_title="๐ฒTalkingAIResearcher๐",
|
| 1514 |
+
# page_icon="๐ฒ๐",
|
| 1515 |
+
# layout="wide",
|
| 1516 |
+
# initial_sidebar_state="auto",
|
| 1517 |
+
# menu_items={
|
| 1518 |
+
# 'Get Help': 'https://huggingface.co/awacke1',
|
| 1519 |
+
# 'Report a bug': 'https://huggingface.co/spaces/awacke1',
|
| 1520 |
+
# 'About': "๐ฒTalkingAIResearcher๐"
|
| 1521 |
+
# }
|
| 1522 |
+
# )
|
| 1523 |
+
# load_dotenv()
|
| 1524 |
+
#
|
| 1525 |
+
# EDGE_TTS_VOICES = [
|
| 1526 |
+
# "en-US-AriaNeural",
|
| 1527 |
+
# "en-US-GuyNeural",
|
| 1528 |
+
# "en-US-JennyNeural",
|
| 1529 |
+
# "en-GB-SoniaNeural",
|
| 1530 |
+
# "en-GB-RyanNeural",
|
| 1531 |
+
# "en-AU-NatashaNeural",
|
| 1532 |
+
# "en-AU-WilliamNeural",
|
| 1533 |
+
# "en-CA-ClaraNeural",
|
| 1534 |
+
# "en-CA-LiamNeural"
|
| 1535 |
+
# ]
|
| 1536 |
+
#
|
| 1537 |
+
# # (Plus additional setup and functions as shown in the snippet above.)
|
| 1538 |
+
#
|
| 1539 |
+
# End of optional features block.
|