#!/usr/bin/env python3
"""
NZ Legislation Loophole Analysis - Hugging Face Spaces App

Root-level app.py for Hugging Face Spaces deployment.
Adapted for Spaces memory constraints and session-based caching.
"""

import streamlit as st
import sys
import os
import warnings
from pathlib import Path

# Add current directory to Python path for imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Import core modules with error handling for Spaces
try:
    from streamlit_app.core.cache_manager import CacheManager, get_cache_manager
    from streamlit_app.core.text_processor import TextProcessor
    from streamlit_app.core.llm_analyzer import LLMAnalyzer
    from streamlit_app.core.dataset_builder import DatasetBuilder
    from streamlit_app.utils.config import ConfigManager
    from streamlit_app.utils.performance import PerformanceMonitor
    from streamlit_app.utils.ui_helpers import UIHelpers
except ImportError as e:
    st.error(f"❌ Import Error: {e}")
    st.error("Please ensure all required packages are installed.")
    st.stop()

# Configure page settings for Spaces
st.set_page_config(
    page_title="NZ Legislation Loophole Analyzer",
    page_icon="⚖️",
    layout="wide",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://huggingface.co/spaces/your-space',
        'Report a bug': 'https://github.com/your-repo/issues',
        'About': '''
        ## NZ Legislation Loophole Analyzer
        AI-powered analysis of New Zealand legislation to identify
        potential loopholes, ambiguities, and unintended consequences.

        **Version:** 1.0.0 (Spaces Edition)
        **Platform:** Hugging Face Spaces
        **Built with:** Streamlit & Llama.cpp
        '''
    }
)

# Spaces-specific configuration
SPACES_CONFIG = {
    'max_memory_mb': 512,  # Conservative memory limit for Spaces
    'cache_enabled': True,
    'persistent_cache': False,  # Session-based only
    'model_path': 'qwen3.gguf',  # Default model
    'context_length': 4096,  # Smaller context for memory
    'max_tokens': 2048,  # Smaller responses
}

def initialize_spaces_session():
    """Initialize session state optimized for Spaces"""
    if 'cache_manager' not in st.session_state:
        # Initialize with Spaces-optimized settings
        st.session_state.cache_manager = CacheManager(
            max_memory_mb=SPACES_CONFIG['max_memory_mb'],
            persistent=False,  # No persistent storage in Spaces
            ttl_hours=1  # Shorter TTL for memory efficiency
        )

    if 'config_manager' not in st.session_state:
        st.session_state.config_manager = ConfigManager()

        # Override with Spaces-optimized defaults
        spaces_defaults = {
            'model': {
                'path': SPACES_CONFIG['model_path'],
                'context_length': SPACES_CONFIG['context_length'],
                'max_tokens': SPACES_CONFIG['max_tokens'],
                'temperature': 0.3,
                'top_p': 0.85,
            },
            'cache': {
                'enabled': SPACES_CONFIG['cache_enabled'],
                'max_size_mb': SPACES_CONFIG['max_memory_mb'],
                'persistent': False,
                'ttl_hours': 1,
            },
            'processing': {
                'chunk_size': 2048,  # Smaller chunks for memory
                'chunk_overlap': 128,
                'batch_size': 4,  # Smaller batch size
                'clean_text': True,
            }
        }

        # Update configuration with Spaces defaults
        st.session_state.config_manager.update_config(spaces_defaults)

    if 'performance_monitor' not in st.session_state:
        st.session_state.performance_monitor = PerformanceMonitor(max_history=100)

    if 'text_processor' not in st.session_state:
        st.session_state.text_processor = TextProcessor()

    if 'current_analysis' not in st.session_state:
        st.session_state.current_analysis = None

    if 'analysis_results' not in st.session_state:
        st.session_state.analysis_results = []

    if 'processing_status' not in st.session_state:
        st.session_state.processing_status = {
            'is_running': False,
            'progress': 0,
            'current_task': '',
            'total_chunks': 0,
            'processed_chunks': 0
        }

    if 'model_loaded' not in st.session_state:
        st.session_state.model_loaded = False

    if 'llm_analyzer' not in st.session_state:
        st.session_state.llm_analyzer = None

def show_spaces_optimized_home_page():
    """Home page optimized for Spaces"""
    st.title("🏠 NZ Legislation Loophole Analyzer")
    st.markdown("### AI-Powered Legal Analysis (Spaces Edition)")

    # Spaces-specific warnings and info
    with st.expander("⚠️ Spaces Environment Notes", expanded=False):
        st.info("""
        **Running on Hugging Face Spaces**
        - Memory optimized for cloud deployment
        - Session-based caching (resets between visits)
        - Use smaller models for best performance
        - Analysis results persist during your session
        """)

    col1, col2 = st.columns([2, 1])

    with col1:
        st.markdown("""
        This AI-powered tool analyzes New Zealand legislation to identify:

        🔍 **Potential Loopholes** - Legal ambiguities that could be exploited
        📋 **Unintended Consequences** - Hidden implications in legislative language
        ⚖️ **Ambiguities** - Vague or unclear legal provisions
        🎯 **Circumvention Strategies** - Ways legislation might be bypassed

        **Key Features:**
        - **Smart Caching**: Avoid re-processing identical content during your session
        - **Memory Optimized**: Designed for Spaces memory constraints
        - **Real-time Progress**: Live processing status and performance metrics
        - **Export Options**: Download results in multiple formats
        """)

        st.markdown("### Quick Start")
        st.markdown("""
        1. **Upload** your NZ legislation files (JSON lines or raw text)
        2. **Configure** analysis parameters (use smaller models for Spaces)
        3. **Process** the legislation with AI-powered analysis
        4. **Review** results with interactive visualizations
        5. **Export** findings before your session ends
        """)

    with col2:
        st.markdown("### Current Configuration")

        config = st.session_state.config_manager.get_config()

        # Model settings
        st.subheader("🤖 Model Settings")
        st.info(f"**Model:** {config['model']['path']}")
        st.info(f"**Context Length:** {config['model']['context_length']}")
        st.info(f"**Max Tokens:** {config['model']['max_tokens']}")

        # Processing settings
        st.subheader("⚙️ Processing")
        st.info(f"**Chunk Size:** {config['processing']['chunk_size']}")
        st.info(f"**Overlap:** {config['processing']['chunk_overlap']}")
        st.info(f"**Batch Size:** {config['processing']['batch_size']}")

        # Cache settings
        st.subheader("🧠 Cache")
        cache_stats = st.session_state.cache_manager.get_stats()
        st.info(f"**Status:** {'Active' if cache_stats['enabled'] else 'Disabled'}")
        st.info(f"**Max Memory:** {SPACES_CONFIG['max_memory_mb']}MB")
        st.info(f"**Hit Rate:** {cache_stats['hit_rate']:.1f}%")

        # Memory warning
        perf_stats = st.session_state.performance_monitor.get_stats()
        memory_usage = perf_stats['memory_usage_mb']
        if memory_usage > SPACES_CONFIG['max_memory_mb'] * 0.8:
            st.warning(f"⚠️ High Memory Usage: {memory_usage:.1f}MB")
        else:
            st.success(f"✅ Memory Usage: {memory_usage:.1f}MB")

        if st.button("🚀 Start Analysis", type="primary", use_container_width=True):
            st.switch_page("pages/1_upload.py")

def show_spaces_optimized_upload_page():
    """Upload page optimized for Spaces"""
    st.title("📤 Upload & Process Legislation")

    # Memory warning for Spaces
    with st.expander("💡 Spaces Optimization Tips", expanded=False):
        st.info("""
        **For Best Performance on Spaces:**
        - Use smaller models (0.8B-1.5B parameters)
        - Process files individually for large documents
        - Keep chunk sizes under 2048 characters
        - Monitor memory usage in the sidebar
        """)

    # File upload section
    st.subheader("📁 Upload Legislation Files")

    col1, col2 = st.columns([1, 1])

    with col1:
        uploaded_files = st.file_uploader(
            "Select NZ legislation files",
            accept_multiple_files=True,
            type=['json', 'txt', 'jsonl'],
            help="Upload JSON lines format (.jsonl), JSON arrays (.json), or raw text (.txt) files",
            key="spaces_file_uploader"
        )

        if uploaded_files:
            st.success(f"📄 {len(uploaded_files)} file(s) selected")

            # Show file details with size warnings
            for file in uploaded_files:
                with st.expander(f"📋 {file.name}"):
                    size_mb = file.size / (1024 * 1024)
                    if size_mb > 10:  # Warning for large files
                        st.warning(".1f")
                    else:
                        st.info(".1f")

                    st.write(f"**Type:** {file.type}")

                    # Preview content
                    if file.type in ['text/plain', 'application/json']:
                        content = file.read().decode('utf-8')
                        preview_length = min(300, len(content))
                        st.text_area("Preview", content[:preview_length] + "..." if len(content) > preview_length else content,
                                   height=100, disabled=True)
                        file.seek(0)  # Reset file pointer

    with col2:
        # Processing configuration optimized for Spaces
        st.subheader("⚙️ Processing Configuration")

        config = st.session_state.config_manager.get_config()

        # Model settings with Spaces warnings
        with st.expander("🤖 Model Configuration", expanded=True):
            st.info("💡 Use smaller models (0.8B-1.5B) for best Spaces performance")

            model_path = st.text_input(
                "Model Path",
                value=config['model']['path'],
                help="Path to your GGUF model file (use small models for Spaces)"
            )

            context_length = st.slider(
                "Context Length",
                min_value=1024,
                max_value=8192,  # Reduced max for Spaces
                value=min(config['model']['context_length'], 4096),
                step=512,
                help="Maximum context length for the model"
            )

            max_tokens = st.slider(
                "Max Response Tokens",
                min_value=256,
                max_value=4096,
                value=min(config['model']['max_tokens'], 2048),
                step=128,
                help="Maximum tokens in model response"
            )

        # Text processing settings
        with st.expander("📝 Text Processing", expanded=True):
            chunk_size = st.slider(
                "Chunk Size",
                min_value=512,
                max_value=4096,  # Reduced for Spaces memory
                value=min(config['processing']['chunk_size'], 2048),
                step=256,
                help="Size of text chunks for processing"
            )

            chunk_overlap = st.slider(
                "Chunk Overlap",
                min_value=32,
                max_value=512,
                value=config['processing']['chunk_overlap'],
                step=32,
                help="Overlap between chunks for context preservation"
            )

        # Analysis settings
        with st.expander("🔍 Analysis Settings", expanded=True):
            analysis_depth = st.select_slider(
                "Analysis Depth",
                options=["Basic", "Standard", "Detailed"],  # Removed comprehensive for memory
                value=config['analysis']['depth'],
                help="Level of detail in legal analysis (use Standard for Spaces)"
            )

            include_recommendations = st.checkbox(
                "Include Recommendations",
                value=config['analysis']['include_recommendations'],
                help="Generate specific recommendations for addressing identified issues"
            )

    # Process button and status
    col1, col2, col3 = st.columns([1, 1, 1])

    with col1:
        if st.button("🔄 Start Processing", type="primary", use_container_width=True):
            if not uploaded_files:
                st.error("Please upload at least one legislation file")
            else:
                start_spaces_processing(uploaded_files, {
                    'model': {
                        'path': model_path,
                        'context_length': context_length,
                        'max_tokens': max_tokens
                    },
                    'processing': {
                        'chunk_size': chunk_size,
                        'chunk_overlap': chunk_overlap
                    },
                    'analysis': {
                        'depth': analysis_depth,
                        'include_recommendations': include_recommendations
                    }
                })

    with col2:
        if st.button("⏹️ Stop Processing", use_container_width=True):
            stop_processing()

    with col3:
        if st.button("📊 View Results", use_container_width=True):
            st.switch_page("pages/2_analysis.py")

def start_spaces_processing(files, config):
    """Start processing optimized for Spaces"""
    # Check memory before starting
    perf_stats = st.session_state.performance_monitor.get_stats()
    if perf_stats['memory_usage_mb'] > SPACES_CONFIG['max_memory_mb'] * 0.9:
        st.warning("⚠️ High memory usage detected. Consider clearing cache first.")
        if st.button("Clear Cache and Continue"):
            st.session_state.cache_manager.clear_cache()
            st.rerun()
        return

    st.session_state.processing_status = {
        'is_running': True,
        'progress': 0,
        'current_task': 'Initializing...',
        'total_chunks': 0,
        'processed_chunks': 0
    }

    # Update configuration
    st.session_state.config_manager.update_config(config)

    # Add memory warning
    st.info("💡 Processing on Spaces - this may take longer than local execution")

    st.rerun()

def stop_processing():
    """Stop the current processing"""
    st.session_state.processing_status['is_running'] = False
    st.session_state.processing_status['current_task'] = 'Stopped by user'

def show_spaces_optimized_results_page():
    """Results page optimized for Spaces"""
    st.title("📊 Analysis Results")

    # Session warning for Spaces
    with st.expander("💾 Session-Based Storage", expanded=False):
        st.warning("""
        **Important:** Results are stored in your session only.
        - Download results before closing your browser
        - Cache resets between visits
        - Consider using smaller models for faster processing
        """)

    if not st.session_state.analysis_results:
        st.info("No analysis results available. Please upload and process legislation files first.")
        return

    # Results overview
    st.subheader("📈 Results Overview")

    col1, col2, col3, col4 = st.columns(4)

    total_results = len(st.session_state.analysis_results)
    total_loopholes = sum(len(result.get('loopholes', [])) for result in st.session_state.analysis_results)
    avg_confidence = sum(result.get('confidence', 0) for result in st.session_state.analysis_results) / max(total_results, 1)

    with col1:
        st.metric("Total Analyses", total_results)

    with col2:
        st.metric("Loopholes Found", total_loopholes)

    with col3:
        st.metric("Avg Confidence", ".2f")

    with col4:
        cache_stats = st.session_state.cache_manager.get_stats()
        st.metric("Cache Hit Rate", ".1f")

    # Results display
    st.subheader("🔍 Detailed Results")

    for i, result in enumerate(st.session_state.analysis_results):
        with st.expander(f"📋 Analysis {i+1}: {result.get('title', 'Unknown Title')}", expanded=i==0):
            col1, col2 = st.columns([2, 1])

            with col1:
                st.markdown("**Summary:**")
                st.write(result.get('summary', 'No summary available'))

                st.markdown("**Key Findings:**")
                for finding in result.get('loopholes', []):
                    st.markdown(f"- {finding}")

            with col2:
                st.metric("Confidence", ".2f")
                st.metric("Processing Time", ".2f")
                st.metric("Chunks Processed", result.get('chunks_processed', 0))

    # Export options with Spaces warning
    st.subheader("💾 Export Results")

    col1, col2, col3 = st.columns(3)

    with col1:
        if st.button("📄 Export as JSON", use_container_width=True):
            export_results('json')

    with col2:
        if st.button("📊 Export as CSV", use_container_width=True):
            export_results('csv')

    with col3:
        if st.button("📋 Export as Excel", use_container_width=True):
            export_results('excel')

def export_results(format_type):
    """Export analysis results in specified format"""
    # TODO: Implement export functionality
    st.success(f"Results exported as {format_type.upper()} - download will be available in the next version")

def show_spaces_optimized_settings_page():
    """Settings page optimized for Spaces"""
    st.title("⚙️ Settings & Configuration")

    # Spaces-specific info
    with st.expander("🌐 Spaces Environment", expanded=False):
        st.info("""
        **Spaces-Specific Settings:**
        - Memory limit: 512MB cache (conservative)
        - Session-based storage only
        - No persistent data between visits
        - Optimized for cloud performance
        """)

    tabs = st.tabs(["🤖 Model Settings", "📝 Processing", "🧠 Cache", "📊 Performance"])

    with tabs[0]:
        st.subheader("🤖 Model Configuration")

        config = st.session_state.config_manager.get_config()

        st.info("💡 For Spaces: Use smaller models (0.8B-1.5B parameters) for best performance")

        model_path = st.text_input(
            "Model Path",
            value=config['model']['path'],
            help="Path to your GGUF model file (smaller models recommended)"
        )

        context_length = st.slider(
            "Context Length",
            min_value=1024,
            max_value=8192,
            value=config['model']['context_length'],
            step=512,
            help="Maximum context length (smaller = faster processing)"
        )

        max_tokens = st.slider(
            "Max Response Tokens",
            min_value=256,
            max_value=4096,
            value=config['model']['max_tokens'],
            step=128,
            help="Maximum tokens in response (smaller = faster)"
        )

        temperature = st.slider(
            "Temperature",
            min_value=0.0,
            max_value=1.0,
            value=config['model']['temperature'],
            step=0.1,
            help="Controls randomness (lower = more consistent)"
        )

    with tabs[1]:
        st.subheader("📝 Text Processing")

        chunk_size = st.slider(
            "Chunk Size",
            min_value=512,
            max_value=4096,
            value=config['processing']['chunk_size'],
            step=256,
            help="Text chunk size (smaller = more memory efficient)"
        )

        chunk_overlap = st.slider(
            "Chunk Overlap",
            min_value=32,
            max_value=512,
            value=config['processing']['chunk_overlap'],
            step=32,
            help="Overlap between chunks for context"
        )

        batch_size = st.slider(
            "Batch Size",
            min_value=1,
            max_value=8,  # Reduced for Spaces
            value=config['processing']['batch_size'],
            step=1,
            help="Number of chunks to process at once (lower = less memory)"
        )

    with tabs[2]:
        st.subheader("🧠 Cache Configuration")

        enable_cache = st.checkbox(
            "Enable Caching",
            value=config['cache']['enabled'],
            help="Use cache to avoid re-processing (recommended)"
        )

        st.info(f"💡 Max cache size: {SPACES_CONFIG['max_memory_mb']}MB (fixed for Spaces)")

        cache_ttl = st.slider(
            "Cache TTL (hours)",
            min_value=0.5,
            max_value=2.0,
            value=config['cache']['ttl_hours'],
            step=0.5,
            help="How long to keep cached results (shorter = less memory)"
        )

    with tabs[3]:
        st.subheader("📊 Performance Monitoring")

        perf_stats = st.session_state.performance_monitor.get_stats()

        col1, col2, col3 = st.columns(3)

        with col1:
            st.metric("Memory Usage", ".1f", "MB")

        with col2:
            st.metric("Cache Hit Rate", ".1f", "%")

        with col3:
            st.metric("Active Threads", perf_stats.get('active_threads', 0))

        # Performance recommendations
        recommendations = st.session_state.performance_monitor.get_recommendations()
        if recommendations:
            st.subheader("💡 Recommendations")
            for rec in recommendations:
                if "High" in rec or "Low" in rec:
                    st.warning(rec)
                else:
                    st.info(rec)

    # Save settings
    col1, col2 = st.columns([1, 1])

    with col1:
        if st.button("💾 Save Settings", type="primary", use_container_width=True):
            new_config = {
                'model': {
                    'path': model_path,
                    'context_length': context_length,
                    'max_tokens': max_tokens,
                    'temperature': temperature
                },
                'processing': {
                    'chunk_size': chunk_size,
                    'chunk_overlap': chunk_overlap,
                    'batch_size': batch_size
                },
                'cache': {
                    'enabled': enable_cache,
                    'ttl_hours': cache_ttl
                }
            }

            st.session_state.config_manager.update_config(new_config)
            st.success("Settings saved successfully!")

    with col2:
        if st.button("🔄 Reset to Defaults", use_container_width=True):
            st.session_state.config_manager.reset_to_defaults()
            st.success("Settings reset to defaults!")
            st.rerun()

def show_spaces_optimized_performance_page():
    """Performance page optimized for Spaces"""
    st.title("📈 Performance Dashboard")

    # Spaces-specific info
    with st.expander("🌐 Spaces Performance Notes", expanded=False):
        st.info("""
        **Spaces Environment:**
        - Memory limit: ~2-8GB shared
        - Cache: Session-based only
        - Performance: Optimized for cloud
        - Monitoring: Real-time metrics
        """)

    # Real-time metrics
    st.subheader("📊 Real-time Metrics")

    col1, col2, col3, col4 = st.columns(4)

    perf_stats = st.session_state.performance_monitor.get_stats()

    with col1:
        st.metric("Memory Usage", ".1f", "MB")

    with col2:
        st.metric("Memory %", ".1f", "%")

    with col3:
        st.metric("CPU Usage", ".1f", "%")

    with col4:
        cache_stats = st.session_state.cache_manager.get_stats()
        st.metric("Cache Hit Rate", ".1f", "%")

    # Memory warning for Spaces
    memory_percent = perf_stats.get('memory_percent', 0)
    if memory_percent > 80:
        st.error("⚠️ High memory usage - consider clearing cache")
    elif memory_percent > 60:
        st.warning("⚠️ Moderate memory usage")
    else:
        st.success("✅ Memory usage within limits")

    # Cache performance
    st.subheader("🧠 Cache Performance")

    cache_stats = st.session_state.cache_manager.get_stats()

    col1, col2, col3, col4 = st.columns(4)

    with col1:
        st.metric("Total Requests", cache_stats['hits'] + cache_stats['misses'])

    with col2:
        st.metric("Cache Hits", cache_stats['hits'])

    with col3:
        st.metric("Cache Misses", cache_stats['misses'])

    with col4:
        st.metric("Hit Rate", ".1f")

    # Performance recommendations
    st.subheader("💡 Performance Recommendations")

    recommendations = st.session_state.performance_monitor.get_recommendations()

    if recommendations:
        for rec in recommendations:
            if "High" in rec or "Low" in rec:
                st.error(rec)
            elif "Moderate" in rec or "Consider" in rec:
                st.warning(rec)
            else:
                st.info(rec)
    else:
        st.success("✅ Performance is optimal!")

    # Cache management
    st.subheader("🧠 Cache Management")

    col1, col2 = st.columns(2)

    with col1:
        if st.button("🔄 Clear Cache", type="secondary", use_container_width=True):
            st.session_state.cache_manager.clear_cache()
            st.success("Cache cleared successfully!")
            st.rerun()

    with col2:
        if st.button("📊 Reset Statistics", use_container_width=True):
            st.session_state.performance_monitor.reset_stats()
            st.success("Statistics reset!")
            st.rerun()

def main():
    """Main application function for Spaces"""
    # Initialize session state
    initialize_spaces_session()

    # Create sidebar with navigation and status
    with st.sidebar:
        st.title("⚖️ NZ Legislation Analyzer")
        st.markdown("---")
        st.markdown("**Spaces Edition**")
        st.markdown("---")

        # Navigation
        pages = {
            "🏠 Home": "home",
            "📤 Upload & Process": "upload",
            "📊 Analysis Results": "results",
            "⚙️ Settings": "settings",
            "📈 Performance": "performance"
        }

        selected_page = st.selectbox(
            "Navigate to:",
            list(pages.keys()),
            key="nav_select"
        )

        st.markdown("---")

        # Cache status
        with st.expander("🧠 Cache Status", expanded=True):
            cache_stats = st.session_state.cache_manager.get_stats()
            st.metric("Cache Hits", cache_stats['hits'])
            st.metric("Cache Misses", cache_stats['misses'])
            st.metric("Hit Rate", ".1f")
            st.metric("Cached Chunks", cache_stats['entries'])

            if st.button("Clear Cache", type="secondary"):
                st.session_state.cache_manager.clear_cache()
                st.rerun()

        # Performance metrics
        with st.expander("📊 Performance", expanded=True):
            perf_stats = st.session_state.performance_monitor.get_stats()
            st.metric("Memory Usage", ".1f")
            st.metric("CPU Usage", ".1f")

        # Processing status
        if st.session_state.processing_status['is_running']:
            with st.expander("🔄 Processing Status", expanded=True):
                st.progress(st.session_state.processing_status['progress'])
                st.text(st.session_state.processing_status['current_task'])
                st.text(f"Chunk {st.session_state.processing_status['processed_chunks']}/"
                       f"{st.session_state.processing_status['total_chunks']}")

    # Main content area
    page = pages[selected_page]

    if page == "home":
        show_spaces_optimized_home_page()
    elif page == "upload":
        show_spaces_optimized_upload_page()
    elif page == "results":
        show_spaces_optimized_results_page()
    elif page == "settings":
        show_spaces_optimized_settings_page()
    elif page == "performance":
        show_spaces_optimized_performance_page()

    # Footer with Spaces branding
    st.markdown("---")
    st.markdown(
        """
        <div style='text-align: center; color: #666; font-size: 12px;'>
        NZ Legislation Loophole Analyzer v1.0.0 (Spaces Edition) |
        Built with Streamlit & Llama.cpp | Hosted on 🤗 Hugging Face Spaces
        </div>
        """,
        unsafe_allow_html=True
    )

if __name__ == "__main__":
    main()