NH-Korea

Running

App Files Files Community

ginipick commited on Apr 26

Commit

f77229e

verified ·

1 Parent(s): 1710ad7

Update app.py

Browse files

Files changed (1) hide show

app.py +444 -232

app.py CHANGED Viewed

@@ -6,11 +6,12 @@ from io import BytesIO
 from PIL import Image
 import streamlit as st
-from openai import OpenAI  # OpenAI 라이브러리
 from gradio_client import Client
 import pandas as pd
 import PyPDF2  # For handling PDF files
 # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
@@ -20,28 +21,32 @@ BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
 BRAVE_NEWS_ENDPOINT  = "https://api.search.brave.com/res/v1/news/search"
 IMAGE_API_URL  = "http://211.233.58.201:7896"
 MAX_TOKENS     = 7999
-# Brave Search modes and style definitions (in English)
-SEARCH_MODES = {
-    "comprehensive": "Comprehensive answer with multiple sources",
-    "academic": "Academic and research-focused results",
-    "news": "Latest news and current events",
-    "technical": "Technical and specialized information",
-    "educational": "Educational and learning resources"
 }
 RESPONSE_STYLES = {
-    "professional": "Professional and formal tone",
-    "casual": "Friendly and conversational tone",
-    "simple": "Simple and easy to understand",
-    "detailed": "Detailed and thorough explanations"
 }
 # Example search queries
 EXAMPLE_QUERIES = {
-    "example1": "What are the latest developments in quantum computing?",
-    "example2": "How does climate change affect biodiversity in tropical rainforests?",
-    "example3": "What are the economic implications of artificial intelligence in the job market?"
 }
 # ──────────────────────────────── Logging ────────────────────────────────
@@ -60,105 +65,289 @@ def get_openai_client():
         timeout=60.0,
         max_retries=3
     )
 # ──────────────────────────────── System Prompt ─────────────────────────
-def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
     """
-    Generate a system prompt for the 'Perplexity Clone' interface based on:
-    - The selected search mode and style
-    - Guidelines for using web search results and uploaded files
     """
-    comprehensive_prompt = """
-You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
-Your task is to:
-1. Thoroughly analyze the user's query
-2. Provide a clear, well-structured answer integrating information from multiple sources
-3. Include relevant videos, and links in your response
-4. Format your answer with proper headings, bullet points, and sections
-5. Cite sources inline and provide a references section at the end
-Important guidelines:
-- Organize information logically with clear section headings
-- Use bullet points and numbered lists for clarity
-- Include specific, factual information whenever possible
-- Provide balanced perspectives on controversial topics
-- Display relevant statistics, data, or quotes when appropriate
-- Format your response using markdown for readability
 """
     mode_prompts = {
-        "academic": """
-Your focus is on providing academic and research-focused responses:
-- Prioritize peer-reviewed research and academic sources
-- Include citations in a formal academic format
-- Discuss methodologies and research limitations where relevant
-- Present different scholarly perspectives on the topic
-- Use precise, technical language appropriate for an academic audience
 """,
-        "news": """
-Your focus is on providing the latest news and current events:
-- Prioritize recent news articles and current information
-- Include publication dates for all news sources
-- Present multiple perspectives from different news outlets
-- Distinguish between facts and opinions/editorial content
-- Update information with the most recent developments
 """,
-        "technical": """
-Your focus is on providing technical and specialized information:
-- Use precise technical terminology appropriate to the field
-- Include code snippets, formulas, or technical diagrams where relevant
-- Break down complex concepts into step-by-step explanations
-- Reference technical documentation, standards, and best practices
-- Consider different technical approaches or methodologies
 """,
-        "educational": """
-Your focus is on providing educational and learning resources:
-- Structure information in a learning-friendly progression
-- Include examples, analogies, and visual explanations
-- Highlight key concepts and definitions
-- Suggest further learning resources at different difficulty levels
-- Present information that's accessible to learners at various levels
 """
     }
     style_guides = {
-        "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
-        "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
-        "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
-        "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
     }
     search_guide = """
-Guidelines for Using Search Results:
-- Include source links directly in your response using markdown: [Source Name](URL)
-- For each major claim or piece of information, indicate its source
-- If sources conflict, explain the different perspectives and their reliability
-- Include relevant video links when appropriate by writing: [Video: Title](video_url)
-- Format search information into a cohesive, well-structured response
-- Include a "References" section at the end listing all major sources with links
 """
     upload_guide = """
-Guidelines for Using Uploaded Files:
-- Treat the uploaded files as primary sources for your response
-- Extract and highlight key information from files that directly addresses the query
-- Quote relevant passages and cite the specific file
-- For numerical data in CSV files, consider creating summary statements
-- For PDF content, reference specific sections or pages
-- Integrate file information seamlessly with web search results
-- When information conflicts, prioritize file content over general web results
 """
     # Base prompt
-    if mode == "comprehensive":
-        final_prompt = comprehensive_prompt
-    else:
-        final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
     # Style
     if style in style_guides:
-        final_prompt += f"\n\nTone and Style: {style_guides[style]}"
     if include_search_results:
         final_prompt += f"\n\n{search_guide}"
@@ -167,23 +356,24 @@ Guidelines for Using Uploaded Files:
         final_prompt += f"\n\n{upload_guide}"
     final_prompt += """
-\n\nAdditional Formatting Requirements:
-- Use markdown headings (## and ###) to organize your response
-- Use bold text (**text**) for emphasis on important points
-- Include a "Related Questions" section at the end with 3-5 follow-up questions
-- Format your response with proper spacing and paragraph breaks
-- Make all links clickable by using proper markdown format: [text](url)
 """
     return final_prompt
 # ──────────────────────────────── Brave Search API ────────────────────────
 @st.cache_data(ttl=3600)
-def brave_search(query: str, count: int = 20):
     if not BRAVE_KEY:
         raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
     headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
-    params = {"q": query, "count": str(count)}
     for attempt in range(3):
         try:
@@ -221,12 +411,12 @@ def brave_search(query: str, count: int = 20):
     return []
 @st.cache_data(ttl=3600)
-def brave_video_search(query: str, count: int = 5):
     if not BRAVE_KEY:
         raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
     headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
-    params = {"q": query, "count": str(count)}
     for attempt in range(3):
         try:
@@ -255,12 +445,12 @@ def brave_video_search(query: str, count: int = 5):
     return []
 @st.cache_data(ttl=3600)
-def brave_news_search(query: str, count: int = 5):
     if not BRAVE_KEY:
         raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
     headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
-    params = {"q": query, "count": str(count)}
     for attempt in range(3):
         try:
@@ -291,19 +481,19 @@ def brave_news_search(query: str, count: int = 5):
 def mock_results(query: str) -> str:
     ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    return (f"# Fallback Search Content (Generated: {ts})\n\n"
-            f"The search API request failed or returned no results for '{query}'. "
-            f"Please generate a response based on any pre-existing knowledge.\n\n"
-            f"Consider these points:\n\n"
-            f"- Basic concepts and importance of {query}\n"
-            f"- Commonly known related statistics or trends\n"
-            f"- Typical expert opinions on this subject\n"
-            f"- Questions that readers might have\n\n"
-            f"Note: This is fallback guidance, not real-time data.\n\n")
 def do_web_search(query: str) -> str:
     try:
-        arts = brave_search(query, 20)
         if not arts:
             logging.warning("No search results, using fallback content")
             return mock_results(query)
@@ -311,26 +501,26 @@ def do_web_search(query: str) -> str:
         videos = brave_video_search(query, 2)
         news   = brave_news_search(query, 3)
-        result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources.\n\n"
-        result += "## Web Results\n\n"
-        for a in arts[:10]:
-            result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
-            result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
         if videos:
-            result += "## Video Results\n\n"
             for vid in videos:
                 result += f"### {vid['title']}\n\n"
                 if vid.get('thumbnail_url'):
-                    result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
-                result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
-        if news:
-            result += "## News Results\n\n"
-            for n in news:
-                result += f"### {n['title']}\n\n{n['description']}\n\n"
-                result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
         return result
@@ -348,11 +538,11 @@ def process_text_file(file):
         if len(text) > 10000:
             text = text[:9700] + "...(truncated)..."
-        result = f"## Text File: {file.name}\n\n" + text
         return result
     except Exception as e:
         logging.error(f"Error processing text file: {str(e)}")
-        return f"Error processing text file: {str(e)}"
 def process_csv_file(file):
     try:
@@ -360,41 +550,41 @@ def process_csv_file(file):
         file.seek(0)
         df = pd.read_csv(io.BytesIO(content))
-        result = f"## CSV File: {file.name}\n\n"
-        result += f"- Rows: {len(df)}\n"
-        result += f"- Columns: {len(df.columns)}\n"
-        result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
-        result += "### Data Preview\n\n"
         preview_df = df.head(10)
         try:
             markdown_table = preview_df.to_markdown(index=False)
             if markdown_table:
                 result += markdown_table + "\n\n"
             else:
-                result += "Unable to display CSV data.\n\n"
         except Exception as e:
             logging.error(f"Markdown table conversion error: {e}")
-            result += "Displaying data as text:\n\n" + str(preview_df) + "\n\n"
         num_cols = df.select_dtypes(include=['number']).columns
         if len(num_cols) > 0:
-            result += "### Basic Statistical Information\n\n"
             try:
                 stats_df = df[num_cols].describe().round(2)
                 stats_markdown = stats_df.to_markdown()
                 if stats_markdown:
                     result += stats_markdown + "\n\n"
                 else:
-                    result += "Unable to display statistical information.\n\n"
             except Exception as e:
                 logging.error(f"Statistical info conversion error: {e}")
-                result += "Unable to generate statistical information.\n\n"
         return result
     except Exception as e:
         logging.error(f"CSV file processing error: {str(e)}")
-        return f"Error processing CSV file: {str(e)}"
 def process_pdf_file(file):
     try:
@@ -404,7 +594,7 @@ def process_pdf_file(file):
         pdf_file = io.BytesIO(file_bytes)
         reader = PyPDF2.PdfReader(pdf_file, strict=False)
-        result = f"## PDF File: {file.name}\n\n- Total pages: {len(reader.pages)}\n\n"
         max_pages = min(5, len(reader.pages))
         all_text = ""
@@ -413,40 +603,40 @@ def process_pdf_file(file):
             try:
                 page = reader.pages[i]
                 page_text = page.extract_text()
-                current_page_text = f"### Page {i+1}\n\n"
                 if page_text and len(page_text.strip()) > 0:
                     if len(page_text) > 1500:
-                        current_page_text += page_text[:1500] + "...(truncated)...\n\n"
                     else:
                         current_page_text += page_text + "\n\n"
                 else:
-                    current_page_text += "(No text could be extracted)\n\n"
                 all_text += current_page_text
                 if len(all_text) > 8000:
-                    all_text += "...(truncating remaining pages)...\n\n"
                     break
             except Exception as page_err:
                 logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
-                all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
         if len(reader.pages) > max_pages:
-            all_text += f"\nNote: Only the first {max_pages} pages are shown.\n\n"
-        result += "### PDF Content\n\n" + all_text
         return result
     except Exception as e:
         logging.error(f"PDF file processing error: {str(e)}")
-        return f"## PDF File: {file.name}\n\nError: {str(e)}\n\nCannot process."
 def process_uploaded_files(files):
     if not files:
         return None
-    result = "# Uploaded File Contents\n\nBelow is the content from the files provided by the user.\n\n"
     for file in files:
         try:
             ext = file.name.split('.')[-1].lower()
@@ -457,10 +647,10 @@ def process_uploaded_files(files):
             elif ext == 'pdf':
                 result += process_pdf_file(file) + "\n\n---\n\n"
             else:
-                result += f"### Unsupported File: {file.name}\n\n---\n\n"
         except Exception as e:
             logging.error(f"File processing error {file.name}: {e}")
-            result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
     return result
@@ -488,8 +678,8 @@ def extract_image_prompt(response_text: str, topic: str):
         response = client.chat.completions.create(
             model="gpt-4.1-mini",
             messages=[
-                {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
-                {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
             ],
             temperature=1,
             max_tokens=80,
@@ -498,9 +688,9 @@ def extract_image_prompt(response_text: str, topic: str):
         return response.choices[0].message.content.strip()
     except Exception as e:
         logging.error(f"OpenAI image prompt generation error: {e}")
-        return f"A professional photo related to {topic}, high quality"
-def md_to_html(md: str, title="Perplexity Clone Response"):
     return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
 def keywords(text: str, top=5):
@@ -508,8 +698,9 @@ def keywords(text: str, top=5):
     return " ".join(cleaned.split()[:top])
 # ──────────────────────────────── Streamlit UI ────────────────────────────
-def perplexity_app():
-    st.title("Perplexity Clone AI Assistant")
     if "ai_model" not in st.session_state:
         st.session_state.ai_model = "gpt-4.1-mini"
@@ -521,48 +712,61 @@ def perplexity_app():
         st.session_state.generate_image = False
     if "web_search_enabled" not in st.session_state:
         st.session_state.web_search_enabled = True
-    if "search_mode" not in st.session_state:
-        st.session_state.search_mode = "comprehensive"
     if "response_style" not in st.session_state:
         st.session_state.response_style = "professional"
     sb = st.sidebar
-    sb.title("Search Settings")
-    sb.subheader("Response Configuration")
     sb.selectbox(
-        "Search Mode",
-        options=list(SEARCH_MODES.keys()),
-        format_func=lambda x: SEARCH_MODES[x],
-        key="search_mode"
     )
     sb.selectbox(
-        "Response Style",
         options=list(RESPONSE_STYLES.keys()),
         format_func=lambda x: RESPONSE_STYLES[x],
         key="response_style"
     )
     # Example queries
-    sb.subheader("Example Queries")
     c1, c2, c3 = sb.columns(3)
-    if c1.button("Quantum Computing", key="ex1"):
         process_example(EXAMPLE_QUERIES["example1"])
-    if c2.button("Climate Change", key="ex2"):
         process_example(EXAMPLE_QUERIES["example2"])
-    if c3.button("AI Economics", key="ex3"):
         process_example(EXAMPLE_QUERIES["example3"])
-    sb.subheader("Other Settings")
-    sb.toggle("Auto Save", key="auto_save")
-    sb.toggle("Auto Image Generation", key="generate_image")
-    web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
     st.session_state.web_search_enabled = web_search_enabled
     if web_search_enabled:
-        st.sidebar.info("✅ Web search results will be integrated into the response.")
     # Download the latest response
     latest_response = next(
@@ -578,35 +782,35 @@ def perplexity_app():
             first_line = latest_response.split('\n', 1)[0].strip()
             title = first_line[:40] + "..." if len(first_line) > 40 else first_line
-        sb.subheader("Download Latest Response")
         d1, d2 = sb.columns(2)
-        d1.download_button("Download as Markdown", latest_response,
                            file_name=f"{title}.md", mime="text/markdown")
-        d2.download_button("Download as HTML", md_to_html(latest_response, title),
                            file_name=f"{title}.html", mime="text/html")
     # JSON conversation record upload
-    up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
     if up:
         try:
             st.session_state.messages = json.load(up)
-            sb.success("Conversation history loaded successfully")
         except Exception as e:
-            sb.error(f"Failed to load: {e}")
     # JSON conversation record download
-    if sb.button("Download Conversation as JSON"):
         sb.download_button(
-            "Save",
             data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
             file_name="conversation_history.json",
             mime="application/json"
         )
     # File Upload
-    st.subheader("Upload Files")
     uploaded_files = st.file_uploader(
-        "Upload files to be used as reference (txt, csv, pdf)",
         type=["txt", "csv", "pdf"],
         accept_multiple_files=True,
         key="file_uploader"
@@ -614,18 +818,18 @@ def perplexity_app():
     if uploaded_files:
         file_count = len(uploaded_files)
-        st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
-        with st.expander("Preview Uploaded Files", expanded=False):
             for idx, file in enumerate(uploaded_files):
-                st.write(f"**File Name:** {file.name}")
                 ext = file.name.split('.')[-1].lower()
                 if ext == 'txt':
                     preview = file.read(1000).decode('utf-8', errors='ignore')
                     file.seek(0)
                     st.text_area(
-                        f"Preview of {file.name}",
                         preview + ("..." if len(preview) >= 1000 else ""),
                         height=150
                     )
@@ -633,10 +837,10 @@ def perplexity_app():
                     try:
                         df = pd.read_csv(file)
                         file.seek(0)
-                        st.write("CSV Preview (up to 5 rows)")
                         st.dataframe(df.head(5))
                     except Exception as e:
-                        st.error(f"CSV preview failed: {e}")
                 elif ext == 'pdf':
                     try:
                         file_bytes = file.read()
@@ -646,17 +850,17 @@ def perplexity_app():
                         reader = PyPDF2.PdfReader(pdf_file, strict=False)
                         pc = len(reader.pages)
-                        st.write(f"PDF File: {pc} pages")
                         if pc > 0:
                             try:
                                 page_text = reader.pages[0].extract_text()
-                                preview = page_text[:500] if page_text else "(No text extracted)"
-                                st.text_area("Preview of the first page", preview + "...", height=150)
                             except:
-                                st.warning("Failed to extract text from the first page")
                     except Exception as e:
-                        st.error(f"PDF preview failed: {e}")
                 if idx < file_count - 1:
                     st.divider()
@@ -668,9 +872,9 @@ def perplexity_app():
             # Videos
             if "videos" in m and m["videos"]:
-                st.subheader("Related Videos")
                 for video in m["videos"]:
-                    video_title = video.get('title', 'Related video')
                     video_url   = video.get('url', '')
                     thumbnail   = video.get('thumbnail', '')
@@ -680,13 +884,13 @@ def perplexity_app():
                             st.write("🎬")
                         with col2:
                             st.markdown(f"**[{video_title}]({video_url})**")
-                            st.write(f"Source: {video.get('source', 'Unknown')}")
                     else:
                         st.markdown(f"🎬 **[{video_title}]({video_url})**")
-                        st.write(f"Source: {video.get('source', 'Unknown')}")
     # User input
-    query = st.chat_input("Enter your query or question here.")
     if query:
         process_input(query, uploaded_files)
@@ -712,8 +916,8 @@ def process_input(query: str, uploaded_files):
         has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
         try:
-            status = st.status("Preparing to answer your query...")
-            status.update(label="Initializing client...")
             client = get_openai_client()
@@ -721,22 +925,27 @@ def process_input(query: str, uploaded_files):
             video_results = []
             news_results = []
             if use_web_search:
-                status.update(label="Performing web search...")
-                with st.spinner("Searching the web..."):
                     search_content = do_web_search(keywords(query, top=5))
                 try:
-                    status.update(label="Finding videos...")
                     video_results = brave_video_search(query, 2)
                     news_results  = brave_news_search(query, 3)
                 except Exception as search_err:
-                    logging.error(f"Media search error: {search_err}")
             file_content = None
             if has_uploaded_files:
-                status.update(label="Processing uploaded files...")
-                with st.spinner("Analyzing files..."):
                     file_content = process_uploaded_files(uploaded_files)
             valid_videos = []
@@ -745,14 +954,14 @@ def process_input(query: str, uploaded_files):
                 if url and url.startswith('http'):
                     valid_videos.append({
                         'url': url,
-                        'title': vid.get('title', 'Video'),
                         'thumbnail': vid.get('thumbnail_url', ''),
-                        'source': vid.get('source', 'Video source')
                     })
-            status.update(label="Preparing comprehensive answer...")
             sys_prompt = get_system_prompt(
-                mode=st.session_state.search_mode,
                 style=st.session_state.response_style,
                 include_search_results=use_web_search,
                 include_uploaded_files=has_uploaded_files
@@ -763,13 +972,16 @@ def process_input(query: str, uploaded_files):
             ]
             user_content = query
             if search_content:
                 user_content += "\n\n" + search_content
             if file_content:
                 user_content += "\n\n" + file_content
             if valid_videos:
-                user_content += "\n\n# Available Videos\n"
                 for i, vid in enumerate(valid_videos):
                     user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
@@ -794,15 +1006,15 @@ def process_input(query: str, uploaded_files):
                 message_placeholder.markdown(full_response, unsafe_allow_html=True)
                 if valid_videos:
-                    st.subheader("Related Videos")
                     for video in valid_videos:
-                        video_title = video.get('title', 'Related video')
                         video_url   = video.get('url', '')
                         st.markdown(f"🎬 **[{video_title}]({video_url})**")
-                        st.write(f"Source: {video.get('source', 'Unknown')}")
-                status.update(label="Response completed!", state="complete")
                 st.session_state.messages.append({
                     "role": "assistant",
@@ -812,27 +1024,27 @@ def process_input(query: str, uploaded_files):
             except Exception as api_error:
                 error_message = str(api_error)
-                logging.error(f"API error: {error_message}")
-                status.update(label=f"Error: {error_message}", state="error")
-                raise Exception(f"Response generation error: {error_message}")
             if st.session_state.generate_image and full_response:
-                with st.spinner("Generating custom image..."):
                     try:
                         ip = extract_image_prompt(full_response, query)
                         img, cap = generate_image(ip)
                         if img:
-                            st.subheader("AI-Generated Image")
                             st.image(img, caption=cap, use_container_width=True)
                     except Exception as img_error:
-                        logging.error(f"Image generation error: {str(img_error)}")
-                        st.warning("Custom image generation failed.")
             if full_response:
-                st.subheader("Download This Response")
                 c1, c2 = st.columns(2)
                 c1.download_button(
-                    "Markdown",
                     data=full_response,
                     file_name=f"{query[:30]}.md",
                     mime="text/markdown"
@@ -850,19 +1062,19 @@ def process_input(query: str, uploaded_files):
                     with open(fn, "w", encoding="utf-8") as fp:
                         json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
                 except Exception as e:
-                    logging.error(f"Auto-save failed: {e}")
         except Exception as e:
             error_message = str(e)
-            placeholder.error(f"An error occurred: {error_message}")
-            logging.error(f"Process input error: {error_message}")
-            ans = f"An error occurred while processing your request: {error_message}"
             st.session_state.messages.append({"role": "assistant", "content": ans})
 # ──────────────────────────────── main ────────────────────────────────────
 def main():
-    st.write("==== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
-    perplexity_app()
 if __name__ == "__main__":
     main()

 from PIL import Image
 import streamlit as st
+from openai import OpenAI
 from gradio_client import Client
 import pandas as pd
 import PyPDF2  # For handling PDF files
+import kagglehub
 # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 BRAVE_NEWS_ENDPOINT  = "https://api.search.brave.com/res/v1/news/search"
 IMAGE_API_URL  = "http://211.233.58.201:7896"
 MAX_TOKENS     = 7999
+KAGGLE_API_KEY = os.getenv("KDATA_API", "")
+# Set Kaggle API key
+os.environ["KAGGLE_KEY"] = KAGGLE_API_KEY
+# Analysis modes and style definitions
+ANALYSIS_MODES = {
+    "price_forecast": "농산물 가격 예측과 시장 분석",
+    "market_trend": "시장 동향 및 수요 패턴 분석",
+    "production_analysis": "생산량 분석 및 식량 안보 전망",
+    "agricultural_policy": "농업 정책 및 규제 영향 분석",
+    "climate_impact": "기후 변화가 농업에 미치는 영향 분석"
 }
 RESPONSE_STYLES = {
+    "professional": "전문적이고 학술적인 분석",
+    "simple": "쉽게 이해할 수 있는 간결한 설명",
+    "detailed": "상세한 통계 기반 깊이 있는 분석",
+    "action_oriented": "실행 가능한 조언과 추천 중심"
 }
 # Example search queries
 EXAMPLE_QUERIES = {
+    "example1": "쌀 가격 추세 및 향후 6개월 전망을 분석해주세요",
+    "example2": "기후 변화는 한국 채소 생산에 어떤 영향을 미치나요?",
+    "example3": "세계 곡물 시장 동향과 국내 식량 안보에 미치는 영향은?"
 }
 # ──────────────────────────────── Logging ────────────────────────────────
         timeout=60.0,
         max_retries=3
     )
+# ────────────────────────────── Kaggle Dataset Access ──────────────────────
+@st.cache_resource
+def load_agriculture_dataset():
+    """Download and load the UN agriculture dataset from Kaggle"""
+    try:
+        path = kagglehub.dataset_download("unitednations/global-food-agriculture-statistics")
+        logging.info(f"Kaggle dataset downloaded to: {path}")
+        # Load metadata about available files
+        available_files = []
+        for root, dirs, files in os.walk(path):
+            for file in files:
+                if file.endswith('.csv'):
+                    file_path = os.path.join(root, file)
+                    file_size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
+                    available_files.append({
+                        'name': file,
+                        'path': file_path,
+                        'size_mb': round(file_size, 2)
+                    })
+        return {
+            'base_path': path,
+            'files': available_files
+        }
+    except Exception as e:
+        logging.error(f"Error loading Kaggle dataset: {e}")
+        return None
+def get_dataset_summary():
+    """Generate a summary of the available agriculture datasets"""
+    dataset_info = load_agriculture_dataset()
+    if not dataset_info:
+        return "Failed to load the UN global food and agriculture statistics dataset."
+    summary = "# UN 글로벌 식량 및 농업 통계 데이터셋\n\n"
+    summary += f"총 {len(dataset_info['files'])}개의 CSV 파일이 포함되어 있습니다.\n\n"
+    # List files with sizes
+    summary += "## 사용 가능한 데이터 파일:\n\n"
+    for i, file_info in enumerate(dataset_info['files'][:10], 1):  # Limit to first 10 files
+        summary += f"{i}. **{file_info['name']}** ({file_info['size_mb']} MB)\n"
+    if len(dataset_info['files']) > 10:
+        summary += f"\n...외 {len(dataset_info['files']) - 10}개 파일\n"
+    # Add example of data structure
+    try:
+        if dataset_info['files']:
+            sample_file = dataset_info['files'][0]['path']
+            df = pd.read_csv(sample_file, nrows=5)
+            summary += "\n## 데이터 샘플 구조:\n\n"
+            summary += df.head(5).to_markdown() + "\n\n"
+            summary += "## 데이터셋 변수 설명:\n\n"
+            for col in df.columns:
+                summary += f"- **{col}**: [변수 설명 필요]\n"
+    except Exception as e:
+        logging.error(f"Error generating dataset sample: {e}")
+        summary += "\n데이터 샘플을 생성하는 중 오류가 발생했습니다.\n"
+    return summary
+def analyze_dataset_for_query(query):
+    """Find and analyze relevant data from the dataset based on the query"""
+    dataset_info = load_agriculture_dataset()
+    if not dataset_info:
+        return "데이터셋을 불러올 수 없습니다. Kaggle API 연결을 확인해주세요."
+    # Extract key terms from the query
+    query_lower = query.lower()
+    # Define keywords to look for in the dataset files
+    keywords = {
+        "쌀": ["rice", "grain"],
+        "밀": ["wheat", "grain"],
+        "옥수수": ["corn", "maize", "grain"],
+        "채소": ["vegetable", "produce"],
+        "과일": ["fruit", "produce"],
+        "가격": ["price", "cost", "value"],
+        "생산": ["production", "yield", "harvest"],
+        "수출": ["export", "trade"],
+        "수입": ["import", "trade"],
+        "소비": ["consumption", "demand"]
+    }
+    # Find relevant files based on the query
+    relevant_files = []
+    # First check for Korean keywords in the query
+    found_keywords = []
+    for k_term, e_terms in keywords.items():
+        if k_term in query_lower:
+            found_keywords.extend([k_term] + e_terms)
+    # If no Korean keywords found, check for English terms in the filenames
+    if not found_keywords:
+        # Generic search through all files
+        relevant_files = dataset_info['files'][:5]  # Take first 5 files as default
+    else:
+        # Search for files related to the found keywords
+        for file_info in dataset_info['files']:
+            file_name_lower = file_info['name'].lower()
+            for keyword in found_keywords:
+                if keyword.lower() in file_name_lower:
+                    relevant_files.append(file_info)
+                    break
+        # If still no relevant files, take the first 5 files
+        if not relevant_files:
+            relevant_files = dataset_info['files'][:5]
+    # Read and analyze the relevant files
+    analysis_result = "# 농업 데이터 분석 결과\n\n"
+    analysis_result += f"쿼리: '{query}'에 대한 분석을 수행했습니다.\n\n"
+    if found_keywords:
+        analysis_result += f"## 분석 키워드: {', '.join(set(found_keywords))}\n\n"
+    # Process each relevant file
+    for file_info in relevant_files[:3]:  # Limit to 3 files for performance
+        try:
+            analysis_result += f"## 파일: {file_info['name']}\n\n"
+            # Read the CSV file
+            df = pd.read_csv(file_info['path'])
+            # Basic file stats
+            analysis_result += f"- 행 수: {len(df)}\n"
+            analysis_result += f"- 열 수: {len(df.columns)}\n"
+            analysis_result += f"- 열 목록: {', '.join(df.columns.tolist())}\n\n"
+            # Sample data
+            analysis_result += "### 데이터 샘플:\n\n"
+            analysis_result += df.head(5).to_markdown() + "\n\n"
+            # Statistical summary of numeric columns
+            numeric_cols = df.select_dtypes(include=['number']).columns
+            if len(numeric_cols) > 0:
+                analysis_result += "### 기본 통계:\n\n"
+                stats_df = df[numeric_cols].describe()
+                analysis_result += stats_df.to_markdown() + "\n\n"
+            # Time series analysis if possible
+            time_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower()]
+            if time_cols:
+                analysis_result += "### 시계열 패턴:\n\n"
+                analysis_result += "데이터셋에 시간 관련 열이 있어 시계열 분석이 가능합니다.\n\n"
+        except Exception as e:
+            logging.error(f"Error analyzing file {file_info['name']}: {e}")
+            analysis_result += f"이 파일 분석 중 오류가 발생했습니다: {str(e)}\n\n"
+    analysis_result += "## 농산물 가격 예측 및 수요 분석에 대한 인사이트\n\n"
+    analysis_result += "데이터셋에서 추출한 정보를 바탕으로 다음 인사이트를 제공합니다:\n\n"
+    analysis_result += "1. 데이터 기반 분석 (기본적인 요약)\n"
+    analysis_result += "2. 주요 가격 및 수요 동향\n"
+    analysis_result += "3. 생산량 및 무역 패턴\n\n"
+    analysis_result += "이 분석은 UN 글로벌 식량 및 농업 통계 데이터셋을 기반으로 합니다.\n\n"
+    return analysis_result
 # ──────────────────────────────── System Prompt ─────────────────────────
+def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
     """
+    Generate a system prompt for the 'Agricultural Price & Demand Forecast AI Assistant' interface based on:
+    - The selected analysis mode and style
+    - Guidelines for using agricultural datasets, web search results and uploaded files
     """
+    base_prompt = """
+당신은 농업 데이터 전문가로서 농산물 가격 예측과 수요 분석을 수행하는 AI 어시스턴트입니다.
+주요 임무:
+1. UN 글로벌 식량 및 농업 통계 데이터셋을 기반으로 농산물 시장 분석
+2. 농산물 가격 추세 예측 및 수요 패턴 분석
+3. 데이터를 바탕으로 명확하고 근거 있는 분석 제공
+4. 관련 정보와 인사이트를 체계적으로 구성하여 제시
+5. 시각적 이해를 돕기 위해 차트, 그래프 등을 적절히 활용
+중요 가이드라인:
+- 데이터에 기반한 객관적 분석을 제공하세요
+- 분석 과정과 방법론을 명확히 설명하세요
+- 통계적 신뢰성과 한계점을 투명하게 제시하세요
+- 이해하기 쉬운 시각적 요소로 분석 결과를 보완하세요
+- 마크다운을 활용해 응답을 체계적으로 구성하세요
 """
     mode_prompts = {
+        "price_forecast": """
+농산물 가격 예측 및 시장 분석에 집중합니다:
+- 과거 가격 데이터 패턴에 기반한 예측 제공
+- 가격 변동성 요인 분석(계절성, 날씨, 정책 등)
+- 단기 및 중장기 가격 전망 제시
+- 가격에 영향을 미치는 국내외 요인 식별
+- 시장 불확실성과 리스크 요소 강조
+""",
+        "market_trend": """
+시장 동향 및 수요 패턴 분석에 집중합니다:
+- 주요 농산물 수요 변화 패턴 식별
+- 소비자 선호도 및 구매 행동 분석
+- 시장 세그먼트 및 틈새시장 기회 탐색
+- 시장 확대/축소 트렌드 평가
+- 수요 탄력성 및 가격 민감도 분석
 """,
+        "production_analysis": """
+생산량 분석 및 식량 안보 전망에 집중합니다:
+- 작물 생산량 추세 및 변동 요인 분석
+- 식량 생산과 인구 성장 간의 관계 평가
+- 국가/지역별 생산 역량 비교
+- 식량 안보 위협 요소 및 취약점 식별
+- 생산성 향상 전략 및 기회 제안
 """,
+        "agricultural_policy": """
+농업 정책 및 규제 영향 분석에 집중합니다:
+- 정부 정책과, 보조금, 규제의 시장 영향 분석
+- 국제 무역 정책과 관세의 농산물 가격 영향 평가
+- 농업 지원 프로그램의 효과성 검토
+- 규제 환경 변화에 따른 시장 조정 예측
+- 정책적 개입의 의도된/의도치 않은 결과 분석
 """,
+        "climate_impact": """
+기후 변화가 농업에 미치는 영향 분석에 집중합니다:
+- 기후 변화와 농산물 생산량/품질 간의 상관관계 분석
+- 기상 이변이 가격 변동성에 미치는 영향 평가
+- 장기적 기후 추세에 따른 농업 패턴 변화 예측
+- 기후 회복력 있는 농업 시스템 전략 제안
+- 지역별 기후 위험 노출도 및 취약성 매핑
 """
     }
     style_guides = {
+        "professional": "전문적이고 학술적인 어조를 사용하세요. 기술적 용어를 적절히 사용하고 체계적인 데이터 분석을 제공하세요.",
+        "simple": "쉽고 간결한 언어로 설명하세요. 전문 용어는 최소화하고 핵심 개념을 일상적인 표현으로 전달하세요.",
+        "detailed": "상세하고 포괄적인 분석을 제공하세요. 다양한 데이터 포인트, 통계적 뉘앙스, 그리고 여러 시나리오를 고려한 심층 분석을 제시하세요.",
+        "action_oriented": "실행 가능한 인사이트와 구체적인 권장사항에 초점을 맞추세요. '다음 단계' 및 '실질적 조언' 섹션을 포함하세요."
     }
+    dataset_guide = """
+UN 글로벌 식량 및 농업 통계 데이터셋 활용 지침:
+- 제공된 데이터셋 분석 결과를 응답의 주요 근거로 사용하세요
+- 데이터의 출처와 연도를 명확히 인용하세요
+- 데이터셋 내 주요 변수 간의 관계를 분석하여 인사이트를 도출하세요
+- 데이터의 한계와 불확실성을 투명하게 언급하세요
+- 필요시 데이터 격차를 식별하고 추가 연구가 필요한 영역을 제안하세요
+"""
     search_guide = """
+웹 검색 결과 활용 지침:
+- 데이터셋 분석을 보완하는 최신 시장 정보로 검색 결과를 활용하세요
+- 각 정보의 출처를 마크다운 링크로 포함하세요: [출처명](URL)
+- 주요 주장이나 데이터 포인트마다 출처를 표시하세요
+- 출처가 상충할 경우, 다양한 관점과 신뢰도를 설명하세요
+- 관련 동영상 링크는 [비디오: 제목](video_url) 형식으로 포함하세요
+- 검색 정보를 일관되고 체계적인 응답으로 통합하세요
+- 모든 주요 출처를 나열한 "참고 자료" 섹션을 마지막에 포함하세요
 """
     upload_guide = """
+업로드된 파일 활용 지침:
+- 업로드된 파일을 응답의 주요 정보원으로 활용하세요
+- 쿼리와 직접 관련된 파일 정보를 추출하고 강조하세요
+- 관련 구절을 인용하고 특정 파일을 출처로 ���용하세요
+- CSV 파일의 수치 데이터는 요약 문장으로 변환하세요
+- PDF 콘텐츠는 특정 섹션이나 페이지를 참조하세요
+- 파일 정보를 웹 검색 결과와 원활하게 통합하세요
+- 정보가 상충할 경우, 일반적인 웹 결과보다 파일 콘텐츠를 우선시하세요
 """
     # Base prompt
+    final_prompt = base_prompt
+    # Add mode-specific guidance
+    if mode in mode_prompts:
+        final_prompt += "\n" + mode_prompts[mode]
     # Style
     if style in style_guides:
+        final_prompt += f"\n\n분석 스타일: {style_guides[style]}"
+    # Always include dataset guide
+    final_prompt += f"\n\n{dataset_guide}"
     if include_search_results:
         final_prompt += f"\n\n{search_guide}"
         final_prompt += f"\n\n{upload_guide}"
     final_prompt += """
+\n\n응답 형식 요구사항:
+- 마크다운 제목(## 및 ###)을 사용하여 응답을 체계적으로 구성하세요
+- 중요한 점은 굵은 텍스트(**텍스트**)로 강조하세요
+- 3-5개의 후속 질문을 포함한 "관련 질문" 섹션을 마지막에 추가하세요
+- 적절한 간격과 단락 구분으로 응답을 서식화하세요
+- 모든 링크는 마크다운 형식으로 클릭 가능하게 만드세요: [텍스트](url)
+- 가능한 경우 데이터를 시각적으로 표현(표, 그래프 등의 설명)하세요
 """
     return final_prompt
 # ──────────────────────────────── Brave Search API ────────────────────────
 @st.cache_data(ttl=3600)
+def brave_search(query: str, count: int = 10):
     if not BRAVE_KEY:
         raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
     headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
+    params = {"q": query + " 농산물 가격 동향 농업 데이터", "count": str(count)}
     for attempt in range(3):
         try:
     return []
 @st.cache_data(ttl=3600)
+def brave_video_search(query: str, count: int = 3):
     if not BRAVE_KEY:
         raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
     headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
+    params = {"q": query + " 농산물 가격 농업 시장", "count": str(count)}
     for attempt in range(3):
         try:
     return []
 @st.cache_data(ttl=3600)
+def brave_news_search(query: str, count: int = 3):
     if not BRAVE_KEY:
         raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
     headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
+    params = {"q": query + " 농산물 가격 동향 농업", "count": str(count)}
     for attempt in range(3):
         try:
 def mock_results(query: str) -> str:
     ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    return (f"# 대체 검색 콘텐츠 (생성 시간: {ts})\n\n"
+            f"'{query}'에 대한 검색 API 요청이 실패했거나 결과가 없습니다. "
+            f"기존 지식을 기반으로 응답을 생성해��세요.\n\n"
+            f"다음 사항을 고려하세요:\n\n"
+            f"- {query}에 관한 기본 개념과 중요성\n"
+            f"- 일반적으로 알려진 관련 통계나 추세\n"
+            f"- 이 주제에 대한 전문가 의견\n"
+            f"- 독자가 가질 수 있는 질문\n\n"
+            f"참고: 이는 실시간 데이터가 아닌 대체 지침입니다.\n\n")
 def do_web_search(query: str) -> str:
     try:
+        arts = brave_search(query, 10)
         if not arts:
             logging.warning("No search results, using fallback content")
             return mock_results(query)
         videos = brave_video_search(query, 2)
         news   = brave_news_search(query, 3)
+        result = "# 웹 검색 결과\n다음 결과를 활용하여 데이터셋 분석을 보완하는 포괄적인 답변을 제공하세요.\n\n"
+        result += "## 웹 결과\n\n"
+        for a in arts[:5]:
+            result += f"### 결과 {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
+            result += f"**출처**: [{a['displayed_link']}]({a['link']})\n\n---\n"
+        if news:
+            result += "## 뉴스 결과\n\n"
+            for n in news:
+                result += f"### {n['title']}\n\n{n['description']}\n\n"
+                result += f"**출처**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
         if videos:
+            result += "## 비디오 결과\n\n"
             for vid in videos:
                 result += f"### {vid['title']}\n\n"
                 if vid.get('thumbnail_url'):
+                    result += f"![썸네일]({vid['thumbnail_url']})\n\n"
+                result += f"**시청**: [{vid['source']}]({vid['video_url']})\n\n"
         return result
         if len(text) > 10000:
             text = text[:9700] + "...(truncated)..."
+        result = f"## 텍스트 파일: {file.name}\n\n" + text
         return result
     except Exception as e:
         logging.error(f"Error processing text file: {str(e)}")
+        return f"텍스트 파일 처리 오류: {str(e)}"
 def process_csv_file(file):
     try:
         file.seek(0)
         df = pd.read_csv(io.BytesIO(content))
+        result = f"## CSV 파일: {file.name}\n\n"
+        result += f"- 행: {len(df)}\n"
+        result += f"- 열: {len(df.columns)}\n"
+        result += f"- 열 이름: {', '.join(df.columns.tolist())}\n\n"
+        result += "### 데이터 미리보기\n\n"
         preview_df = df.head(10)
         try:
             markdown_table = preview_df.to_markdown(index=False)
             if markdown_table:
                 result += markdown_table + "\n\n"
             else:
+                result += "CSV 데이터를 표시할 수 없습니다.\n\n"
         except Exception as e:
             logging.error(f"Markdown table conversion error: {e}")
+            result += "텍스트로 데이터 표시:\n\n" + str(preview_df) + "\n\n"
         num_cols = df.select_dtypes(include=['number']).columns
         if len(num_cols) > 0:
+            result += "### 기본 통계 정보\n\n"
             try:
                 stats_df = df[num_cols].describe().round(2)
                 stats_markdown = stats_df.to_markdown()
                 if stats_markdown:
                     result += stats_markdown + "\n\n"
                 else:
+                    result += "통계 정보를 표시할 수 없습니다.\n\n"
             except Exception as e:
                 logging.error(f"Statistical info conversion error: {e}")
+                result += "통계 정보를 생성할 수 없습니다.\n\n"
         return result
     except Exception as e:
         logging.error(f"CSV file processing error: {str(e)}")
+        return f"CSV 파일 처리 오류: {str(e)}"
 def process_pdf_file(file):
     try:
         pdf_file = io.BytesIO(file_bytes)
         reader = PyPDF2.PdfReader(pdf_file, strict=False)
+        result = f"## PDF 파일: {file.name}\n\n- 총 페이지: {len(reader.pages)}\n\n"
         max_pages = min(5, len(reader.pages))
         all_text = ""
             try:
                 page = reader.pages[i]
                 page_text = page.extract_text()
+                current_page_text = f"### 페이지 {i+1}\n\n"
                 if page_text and len(page_text.strip()) > 0:
                     if len(page_text) > 1500:
+                        current_page_text += page_text[:1500] + "...(축약됨)...\n\n"
                     else:
                         current_page_text += page_text + "\n\n"
                 else:
+                    current_page_text += "(텍스트를 추출할 수 없음)\n\n"
                 all_text += current_page_text
                 if len(all_text) > 8000:
+                    all_text += "...(나머지 페이지 축약됨)...\n\n"
                     break
             except Exception as page_err:
                 logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
+                all_text += f"### 페이지 {i+1}\n\n(내용 추출 오류: {str(page_err)})\n\n"
         if len(reader.pages) > max_pages:
+            all_text += f"\n참고: 처음 {max_pages} 페이지만 표시됩니다.\n\n"
+        result += "### PDF 내용\n\n" + all_text
         return result
     except Exception as e:
         logging.error(f"PDF file processing error: {str(e)}")
+        return f"## PDF 파일: {file.name}\n\n오류: {str(e)}\n\n처리할 수 없습니다."
 def process_uploaded_files(files):
     if not files:
         return None
+    result = "# 업로드된 파일 내용\n\n사용자가 제공한 파일의 내용입니다.\n\n"
     for file in files:
         try:
             ext = file.name.split('.')[-1].lower()
             elif ext == 'pdf':
                 result += process_pdf_file(file) + "\n\n---\n\n"
             else:
+                result += f"### 지원되지 않는 파일: {file.name}\n\n---\n\n"
         except Exception as e:
             logging.error(f"File processing error {file.name}: {e}")
+            result += f"### 파일 처리 오류: {file.name}\n\n오류: {e}\n\n---\n\n"
     return result
         response = client.chat.completions.create(
             model="gpt-4.1-mini",
             messages=[
+                {"role": "system", "content": "농업 및 농산물에 관한 이미지 프롬프트를 생성합니다. 한 줄의 영어로 된 프롬프트만 반환하세요, 다른 텍스트는 포함하지 마세요."},
+                {"role": "user", "content": f"주제: {topic}\n\n---\n{response_text}\n\n---"}
             ],
             temperature=1,
             max_tokens=80,
         return response.choices[0].message.content.strip()
     except Exception as e:
         logging.error(f"OpenAI image prompt generation error: {e}")
+        return f"A professional photograph of agricultural produce and farm fields, data visualization of crop prices and trends, high quality"
+def md_to_html(md: str, title="농산물 수요 예측 분석 결과"):
     return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
 def keywords(text: str, top=5):
     return " ".join(cleaned.split()[:top])
 # ──────────────────────────────── Streamlit UI ────────────────────────────
+def agricultural_price_forecast_app():
+    st.title("농산물 수요 및 가격 예측 AI 어시스턴트")
+    st.markdown("UN 글로벌 식량 및 농업 통계 데이터셋 분석 기반의 농산물 시장 예측")
     if "ai_model" not in st.session_state:
         st.session_state.ai_model = "gpt-4.1-mini"
         st.session_state.generate_image = False
     if "web_search_enabled" not in st.session_state:
         st.session_state.web_search_enabled = True
+    if "analysis_mode" not in st.session_state:
+        st.session_state.analysis_mode = "price_forecast"
     if "response_style" not in st.session_state:
         st.session_state.response_style = "professional"
     sb = st.sidebar
+    sb.title("분석 설정")
+    # Kaggle dataset info display
+    if sb.checkbox("데이터셋 정보 표시", value=False):
+        st.info("UN 글로벌 식량 및 농업 통계 데이터셋을 불러오는 중...")
+        dataset_info = load_agriculture_dataset()
+        if dataset_info:
+            st.success(f"데이터셋 로드 완료: {len(dataset_info['files'])}개 파일")
+            with st.expander("데이터셋 미리보기", expanded=False):
+                for file_info in dataset_info['files'][:5]:
+                    st.write(f"**{file_info['name']}** ({file_info['size_mb']} MB)")
+        else:
+            st.error("데이터셋을 불러오는데 실패했습니다. Kaggle API 설정을 확인하세요.")
+    sb.subheader("분석 구성")
     sb.selectbox(
+        "분석 모드",
+        options=list(ANALYSIS_MODES.keys()),
+        format_func=lambda x: ANALYSIS_MODES[x],
+        key="analysis_mode"
     )
     sb.selectbox(
+        "응답 스타일",
         options=list(RESPONSE_STYLES.keys()),
         format_func=lambda x: RESPONSE_STYLES[x],
         key="response_style"
     )
     # Example queries
+    sb.subheader("예시 질문")
     c1, c2, c3 = sb.columns(3)
+    if c1.button("쌀 가격 전망", key="ex1"):
         process_example(EXAMPLE_QUERIES["example1"])
+    if c2.button("기후 영향", key="ex2"):
         process_example(EXAMPLE_QUERIES["example2"])
+    if c3.button("곡물 시장", key="ex3"):
         process_example(EXAMPLE_QUERIES["example3"])
+    sb.subheader("기타 설정")
+    sb.toggle("자동 저장", key="auto_save")
+    sb.toggle("이미지 자동 생성", key="generate_image")
+    web_search_enabled = sb.toggle("웹 검색 사용", value=st.session_state.web_search_enabled)
     st.session_state.web_search_enabled = web_search_enabled
     if web_search_enabled:
+        st.sidebar.info("✅ 웹 검색 결과가 응답에 통합됩니다.")
     # Download the latest response
     latest_response = next(
             first_line = latest_response.split('\n', 1)[0].strip()
             title = first_line[:40] + "..." if len(first_line) > 40 else first_line
+        sb.subheader("최신 응답 다운로드")
         d1, d2 = sb.columns(2)
+        d1.download_button("마크다운으로 다운로드", latest_response,
                            file_name=f"{title}.md", mime="text/markdown")
+        d2.download_button("HTML로 다운로드", md_to_html(latest_response, title),
                            file_name=f"{title}.html", mime="text/html")
     # JSON conversation record upload
+    up = sb.file_uploader("대화 기록 불러오기 (.json)", type=["json"], key="json_uploader")
     if up:
         try:
             st.session_state.messages = json.load(up)
+            sb.success("대화 기록을 성공적으로 불러왔습니다")
         except Exception as e:
+            sb.error(f"불러오기 실패: {e}")
     # JSON conversation record download
+    if sb.button("대화 기록을 JSON으로 다운로드"):
         sb.download_button(
+            "저장",
             data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
             file_name="conversation_history.json",
             mime="application/json"
         )
     # File Upload
+    st.subheader("파일 업로드")
     uploaded_files = st.file_uploader(
+        "참고 자료로 사용할 파일 업로드 (txt, csv, pdf)",
         type=["txt", "csv", "pdf"],
         accept_multiple_files=True,
         key="file_uploader"
     if uploaded_files:
         file_count = len(uploaded_files)
+        st.success(f"{file_count}개 파일이 업로드되었습니다. 질의에 대한 소스로 사용됩니다.")
+        with st.expander("업로드된 파일 미리보기", expanded=False):
             for idx, file in enumerate(uploaded_files):
+                st.write(f"**파일명:** {file.name}")
                 ext = file.name.split('.')[-1].lower()
                 if ext == 'txt':
                     preview = file.read(1000).decode('utf-8', errors='ignore')
                     file.seek(0)
                     st.text_area(
+                        f"{file.name} 미리보기",
                         preview + ("..." if len(preview) >= 1000 else ""),
                         height=150
                     )
                     try:
                         df = pd.read_csv(file)
                         file.seek(0)
+                        st.write("CSV 미리보기 (최대 5행)")
                         st.dataframe(df.head(5))
                     except Exception as e:
+                        st.error(f"CSV 미리보기 실패: {e}")
                 elif ext == 'pdf':
                     try:
                         file_bytes = file.read()
                         reader = PyPDF2.PdfReader(pdf_file, strict=False)
                         pc = len(reader.pages)
+                        st.write(f"PDF 파일: {pc}페이지")
                         if pc > 0:
                             try:
                                 page_text = reader.pages[0].extract_text()
+                                preview = page_text[:500] if page_text else "(텍스트 추출 불가)"
+                                st.text_area("첫 페이지 미리보기", preview + "...", height=150)
                             except:
+                                st.warning("첫 페이지 텍스트 추출 실패")
                     except Exception as e:
+                        st.error(f"PDF 미리보기 실패: {e}")
                 if idx < file_count - 1:
                     st.divider()
             # Videos
             if "videos" in m and m["videos"]:
+                st.subheader("관련 비디오")
                 for video in m["videos"]:
+                    video_title = video.get('title', '관련 비디오')
                     video_url   = video.get('url', '')
                     thumbnail   = video.get('thumbnail', '')
                             st.write("🎬")
                         with col2:
                             st.markdown(f"**[{video_title}]({video_url})**")
+                            st.write(f"출처: {video.get('source', '알 수 없음')}")
                     else:
                         st.markdown(f"🎬 **[{video_title}]({video_url})**")
+                        st.write(f"출처: {video.get('source', '알 수 없음')}")
     # User input
+    query = st.chat_input("농산물 가격, 수요 또는 시장 동향 관련 질문을 입력하세요.")
     if query:
         process_input(query, uploaded_files)
         has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
         try:
+            status = st.status("질문에 답변 준비 중...")
+            status.update(label="클라이언트 초기화 중...")
             client = get_openai_client()
             video_results = []
             news_results = []
+            # 농업 데이터셋 분석 결과 가져오기
+            status.update(label="농업 데이터셋 분석 중...")
+            with st.spinner("데이터셋 분석 중..."):
+                dataset_analysis = analyze_dataset_for_query(query)
             if use_web_search:
+                status.update(label="웹 검색 수행 중...")
+                with st.spinner("웹 검색 중..."):
                     search_content = do_web_search(keywords(query, top=5))
                 try:
+                    status.update(label="비디오 검색 중...")
                     video_results = brave_video_search(query, 2)
                     news_results  = brave_news_search(query, 3)
                 except Exception as search_err:
+                    logging.error(f"미디어 검색 오류: {search_err}")
             file_content = None
             if has_uploaded_files:
+                status.update(label="업로드된 파일 처리 중...")
+                with st.spinner("파일 분석 중..."):
                     file_content = process_uploaded_files(uploaded_files)
             valid_videos = []
                 if url and url.startswith('http'):
                     valid_videos.append({
                         'url': url,
+                        'title': vid.get('title', '비디오'),
                         'thumbnail': vid.get('thumbnail_url', ''),
+                        'source': vid.get('source', '비디오 출처')
                     })
+            status.update(label="종합 분석 준비 중...")
             sys_prompt = get_system_prompt(
+                mode=st.session_state.analysis_mode,
                 style=st.session_state.response_style,
                 include_search_results=use_web_search,
                 include_uploaded_files=has_uploaded_files
             ]
             user_content = query
+            # 항상 데이터셋 분석 결과 포함
+            user_content += "\n\n" + dataset_analysis
             if search_content:
                 user_content += "\n\n" + search_content
             if file_content:
                 user_content += "\n\n" + file_content
             if valid_videos:
+                user_content += "\n\n# 관련 동영상\n"
                 for i, vid in enumerate(valid_videos):
                     user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
                 message_placeholder.markdown(full_response, unsafe_allow_html=True)
                 if valid_videos:
+                    st.subheader("관련 비디오")
                     for video in valid_videos:
+                        video_title = video.get('title', '관련 비디오')
                         video_url   = video.get('url', '')
                         st.markdown(f"🎬 **[{video_title}]({video_url})**")
+                        st.write(f"출처: {video.get('source', '알 수 없음')}")
+                status.update(label="응답 완료!", state="complete")
                 st.session_state.messages.append({
                     "role": "assistant",
             except Exception as api_error:
                 error_message = str(api_error)
+                logging.error(f"API 오류: {error_message}")
+                status.update(label=f"오류: {error_message}", state="error")
+                raise Exception(f"응답 생성 오류: {error_message}")
             if st.session_state.generate_image and full_response:
+                with st.spinner("맞춤형 이미지 생성 중..."):
                     try:
                         ip = extract_image_prompt(full_response, query)
                         img, cap = generate_image(ip)
                         if img:
+                            st.subheader("AI 생성 이미지")
                             st.image(img, caption=cap, use_container_width=True)
                     except Exception as img_error:
+                        logging.error(f"이미지 생성 오류: {str(img_error)}")
+                        st.warning("맞춤형 이미지 생성에 실패했습니다.")
             if full_response:
+                st.subheader("이 응답 다운로드")
                 c1, c2 = st.columns(2)
                 c1.download_button(
+                    "마크다운",
                     data=full_response,
                     file_name=f"{query[:30]}.md",
                     mime="text/markdown"
                     with open(fn, "w", encoding="utf-8") as fp:
                         json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
                 except Exception as e:
+                    logging.error(f"자동 저장 실패: {e}")
         except Exception as e:
             error_message = str(e)
+            placeholder.error(f"오류 발생: {error_message}")
+            logging.error(f"입력 처리 오류: {error_message}")
+            ans = f"요청 처리 중 오류가 발생했습니다: {error_message}"
             st.session_state.messages.append({"role": "assistant", "content": ans})
 # ──────────────────────────────── main ────────────────────────────────────
 def main():
+    st.write("==== 애플리케이션 시작 시간:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
+    agricultural_price_forecast_app()
 if __name__ == "__main__":
     main()