#!/usr/bin/env python3 """ NZ Legislation Loophole Analysis - Hugging Face Spaces App Root-level app.py for Hugging Face Spaces deployment. Adapted for Spaces memory constraints and session-based caching. """ import streamlit as st import sys import os import warnings from pathlib import Path # Add current directory to Python path for imports sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Suppress warnings for cleaner output warnings.filterwarnings('ignore') # Import core modules with error handling for Spaces try: from streamlit_app.core.cache_manager import CacheManager, get_cache_manager from streamlit_app.core.text_processor import TextProcessor from streamlit_app.core.llm_analyzer import LLMAnalyzer from streamlit_app.core.dataset_builder import DatasetBuilder from streamlit_app.utils.config import ConfigManager from streamlit_app.utils.performance import PerformanceMonitor from streamlit_app.utils.ui_helpers import UIHelpers except ImportError as e: st.error(f"â Import Error: {e}") st.error("Please ensure all required packages are installed.") st.stop() # Configure page settings for Spaces st.set_page_config( page_title="NZ Legislation Loophole Analyzer", page_icon="âī¸", layout="wide", initial_sidebar_state="expanded", menu_items={ 'Get Help': 'https://huggingface.co/spaces/your-space', 'Report a bug': 'https://github.com/your-repo/issues', 'About': ''' ## NZ Legislation Loophole Analyzer AI-powered analysis of New Zealand legislation to identify potential loopholes, ambiguities, and unintended consequences. **Version:** 1.0.0 (Spaces Edition) **Platform:** Hugging Face Spaces **Built with:** Streamlit & Llama.cpp ''' } ) # Spaces-specific configuration SPACES_CONFIG = { 'max_memory_mb': 512, # Conservative memory limit for Spaces 'cache_enabled': True, 'persistent_cache': False, # Session-based only 'model_path': 'qwen3.gguf', # Default model 'context_length': 4096, # Smaller context for memory 'max_tokens': 2048, # Smaller responses } def initialize_spaces_session(): """Initialize session state optimized for Spaces""" if 'cache_manager' not in st.session_state: # Initialize with Spaces-optimized settings st.session_state.cache_manager = CacheManager( max_memory_mb=SPACES_CONFIG['max_memory_mb'], persistent=False, # No persistent storage in Spaces ttl_hours=1 # Shorter TTL for memory efficiency ) if 'config_manager' not in st.session_state: st.session_state.config_manager = ConfigManager() # Override with Spaces-optimized defaults spaces_defaults = { 'model': { 'path': SPACES_CONFIG['model_path'], 'context_length': SPACES_CONFIG['context_length'], 'max_tokens': SPACES_CONFIG['max_tokens'], 'temperature': 0.3, 'top_p': 0.85, }, 'cache': { 'enabled': SPACES_CONFIG['cache_enabled'], 'max_size_mb': SPACES_CONFIG['max_memory_mb'], 'persistent': False, 'ttl_hours': 1, }, 'processing': { 'chunk_size': 2048, # Smaller chunks for memory 'chunk_overlap': 128, 'batch_size': 4, # Smaller batch size 'clean_text': True, } } # Update configuration with Spaces defaults st.session_state.config_manager.update_config(spaces_defaults) if 'performance_monitor' not in st.session_state: st.session_state.performance_monitor = PerformanceMonitor(max_history=100) if 'text_processor' not in st.session_state: st.session_state.text_processor = TextProcessor() if 'current_analysis' not in st.session_state: st.session_state.current_analysis = None if 'analysis_results' not in st.session_state: st.session_state.analysis_results = [] if 'processing_status' not in st.session_state: st.session_state.processing_status = { 'is_running': False, 'progress': 0, 'current_task': '', 'total_chunks': 0, 'processed_chunks': 0 } if 'model_loaded' not in st.session_state: st.session_state.model_loaded = False if 'llm_analyzer' not in st.session_state: st.session_state.llm_analyzer = None def show_spaces_optimized_home_page(): """Home page optimized for Spaces""" st.title("đ NZ Legislation Loophole Analyzer") st.markdown("### AI-Powered Legal Analysis (Spaces Edition)") # Spaces-specific warnings and info with st.expander("â ī¸ Spaces Environment Notes", expanded=False): st.info(""" **Running on Hugging Face Spaces** - Memory optimized for cloud deployment - Session-based caching (resets between visits) - Use smaller models for best performance - Analysis results persist during your session """) col1, col2 = st.columns([2, 1]) with col1: st.markdown(""" This AI-powered tool analyzes New Zealand legislation to identify: đ **Potential Loopholes** - Legal ambiguities that could be exploited đ **Unintended Consequences** - Hidden implications in legislative language âī¸ **Ambiguities** - Vague or unclear legal provisions đ¯ **Circumvention Strategies** - Ways legislation might be bypassed **Key Features:** - **Smart Caching**: Avoid re-processing identical content during your session - **Memory Optimized**: Designed for Spaces memory constraints - **Real-time Progress**: Live processing status and performance metrics - **Export Options**: Download results in multiple formats """) st.markdown("### Quick Start") st.markdown(""" 1. **Upload** your NZ legislation files (JSON lines or raw text) 2. **Configure** analysis parameters (use smaller models for Spaces) 3. **Process** the legislation with AI-powered analysis 4. **Review** results with interactive visualizations 5. **Export** findings before your session ends """) with col2: st.markdown("### Current Configuration") config = st.session_state.config_manager.get_config() # Model settings st.subheader("đ¤ Model Settings") st.info(f"**Model:** {config['model']['path']}") st.info(f"**Context Length:** {config['model']['context_length']}") st.info(f"**Max Tokens:** {config['model']['max_tokens']}") # Processing settings st.subheader("âī¸ Processing") st.info(f"**Chunk Size:** {config['processing']['chunk_size']}") st.info(f"**Overlap:** {config['processing']['chunk_overlap']}") st.info(f"**Batch Size:** {config['processing']['batch_size']}") # Cache settings st.subheader("đ§ Cache") cache_stats = st.session_state.cache_manager.get_stats() st.info(f"**Status:** {'Active' if cache_stats['enabled'] else 'Disabled'}") st.info(f"**Max Memory:** {SPACES_CONFIG['max_memory_mb']}MB") st.info(f"**Hit Rate:** {cache_stats['hit_rate']:.1f}%") # Memory warning perf_stats = st.session_state.performance_monitor.get_stats() memory_usage = perf_stats['memory_usage_mb'] if memory_usage > SPACES_CONFIG['max_memory_mb'] * 0.8: st.warning(f"â ī¸ High Memory Usage: {memory_usage:.1f}MB") else: st.success(f"â Memory Usage: {memory_usage:.1f}MB") if st.button("đ Start Analysis", type="primary", use_container_width=True): st.switch_page("pages/1_upload.py") def show_spaces_optimized_upload_page(): """Upload page optimized for Spaces""" st.title("đ¤ Upload & Process Legislation") # Memory warning for Spaces with st.expander("đĄ Spaces Optimization Tips", expanded=False): st.info(""" **For Best Performance on Spaces:** - Use smaller models (0.8B-1.5B parameters) - Process files individually for large documents - Keep chunk sizes under 2048 characters - Monitor memory usage in the sidebar """) # File upload section st.subheader("đ Upload Legislation Files") col1, col2 = st.columns([1, 1]) with col1: uploaded_files = st.file_uploader( "Select NZ legislation files", accept_multiple_files=True, type=['json', 'txt', 'jsonl'], help="Upload JSON lines format (.jsonl), JSON arrays (.json), or raw text (.txt) files", key="spaces_file_uploader" ) if uploaded_files: st.success(f"đ {len(uploaded_files)} file(s) selected") # Show file details with size warnings for file in uploaded_files: with st.expander(f"đ {file.name}"): size_mb = file.size / (1024 * 1024) if size_mb > 10: # Warning for large files st.warning(".1f") else: st.info(".1f") st.write(f"**Type:** {file.type}") # Preview content if file.type in ['text/plain', 'application/json']: content = file.read().decode('utf-8') preview_length = min(300, len(content)) st.text_area("Preview", content[:preview_length] + "..." if len(content) > preview_length else content, height=100, disabled=True) file.seek(0) # Reset file pointer with col2: # Processing configuration optimized for Spaces st.subheader("âī¸ Processing Configuration") config = st.session_state.config_manager.get_config() # Model settings with Spaces warnings with st.expander("đ¤ Model Configuration", expanded=True): st.info("đĄ Use smaller models (0.8B-1.5B) for best Spaces performance") model_path = st.text_input( "Model Path", value=config['model']['path'], help="Path to your GGUF model file (use small models for Spaces)" ) context_length = st.slider( "Context Length", min_value=1024, max_value=8192, # Reduced max for Spaces value=min(config['model']['context_length'], 4096), step=512, help="Maximum context length for the model" ) max_tokens = st.slider( "Max Response Tokens", min_value=256, max_value=4096, value=min(config['model']['max_tokens'], 2048), step=128, help="Maximum tokens in model response" ) # Text processing settings with st.expander("đ Text Processing", expanded=True): chunk_size = st.slider( "Chunk Size", min_value=512, max_value=4096, # Reduced for Spaces memory value=min(config['processing']['chunk_size'], 2048), step=256, help="Size of text chunks for processing" ) chunk_overlap = st.slider( "Chunk Overlap", min_value=32, max_value=512, value=config['processing']['chunk_overlap'], step=32, help="Overlap between chunks for context preservation" ) # Analysis settings with st.expander("đ Analysis Settings", expanded=True): analysis_depth = st.select_slider( "Analysis Depth", options=["Basic", "Standard", "Detailed"], # Removed comprehensive for memory value=config['analysis']['depth'], help="Level of detail in legal analysis (use Standard for Spaces)" ) include_recommendations = st.checkbox( "Include Recommendations", value=config['analysis']['include_recommendations'], help="Generate specific recommendations for addressing identified issues" ) # Process button and status col1, col2, col3 = st.columns([1, 1, 1]) with col1: if st.button("đ Start Processing", type="primary", use_container_width=True): if not uploaded_files: st.error("Please upload at least one legislation file") else: start_spaces_processing(uploaded_files, { 'model': { 'path': model_path, 'context_length': context_length, 'max_tokens': max_tokens }, 'processing': { 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap }, 'analysis': { 'depth': analysis_depth, 'include_recommendations': include_recommendations } }) with col2: if st.button("âšī¸ Stop Processing", use_container_width=True): stop_processing() with col3: if st.button("đ View Results", use_container_width=True): st.switch_page("pages/2_analysis.py") def start_spaces_processing(files, config): """Start processing optimized for Spaces""" # Check memory before starting perf_stats = st.session_state.performance_monitor.get_stats() if perf_stats['memory_usage_mb'] > SPACES_CONFIG['max_memory_mb'] * 0.9: st.warning("â ī¸ High memory usage detected. Consider clearing cache first.") if st.button("Clear Cache and Continue"): st.session_state.cache_manager.clear_cache() st.rerun() return st.session_state.processing_status = { 'is_running': True, 'progress': 0, 'current_task': 'Initializing...', 'total_chunks': 0, 'processed_chunks': 0 } # Update configuration st.session_state.config_manager.update_config(config) # Add memory warning st.info("đĄ Processing on Spaces - this may take longer than local execution") st.rerun() def stop_processing(): """Stop the current processing""" st.session_state.processing_status['is_running'] = False st.session_state.processing_status['current_task'] = 'Stopped by user' def show_spaces_optimized_results_page(): """Results page optimized for Spaces""" st.title("đ Analysis Results") # Session warning for Spaces with st.expander("đž Session-Based Storage", expanded=False): st.warning(""" **Important:** Results are stored in your session only. - Download results before closing your browser - Cache resets between visits - Consider using smaller models for faster processing """) if not st.session_state.analysis_results: st.info("No analysis results available. Please upload and process legislation files first.") return # Results overview st.subheader("đ Results Overview") col1, col2, col3, col4 = st.columns(4) total_results = len(st.session_state.analysis_results) total_loopholes = sum(len(result.get('loopholes', [])) for result in st.session_state.analysis_results) avg_confidence = sum(result.get('confidence', 0) for result in st.session_state.analysis_results) / max(total_results, 1) with col1: st.metric("Total Analyses", total_results) with col2: st.metric("Loopholes Found", total_loopholes) with col3: st.metric("Avg Confidence", ".2f") with col4: cache_stats = st.session_state.cache_manager.get_stats() st.metric("Cache Hit Rate", ".1f") # Results display st.subheader("đ Detailed Results") for i, result in enumerate(st.session_state.analysis_results): with st.expander(f"đ Analysis {i+1}: {result.get('title', 'Unknown Title')}", expanded=i==0): col1, col2 = st.columns([2, 1]) with col1: st.markdown("**Summary:**") st.write(result.get('summary', 'No summary available')) st.markdown("**Key Findings:**") for finding in result.get('loopholes', []): st.markdown(f"- {finding}") with col2: st.metric("Confidence", ".2f") st.metric("Processing Time", ".2f") st.metric("Chunks Processed", result.get('chunks_processed', 0)) # Export options with Spaces warning st.subheader("đž Export Results") col1, col2, col3 = st.columns(3) with col1: if st.button("đ Export as JSON", use_container_width=True): export_results('json') with col2: if st.button("đ Export as CSV", use_container_width=True): export_results('csv') with col3: if st.button("đ Export as Excel", use_container_width=True): export_results('excel') def export_results(format_type): """Export analysis results in specified format""" # TODO: Implement export functionality st.success(f"Results exported as {format_type.upper()} - download will be available in the next version") def show_spaces_optimized_settings_page(): """Settings page optimized for Spaces""" st.title("âī¸ Settings & Configuration") # Spaces-specific info with st.expander("đ Spaces Environment", expanded=False): st.info(""" **Spaces-Specific Settings:** - Memory limit: 512MB cache (conservative) - Session-based storage only - No persistent data between visits - Optimized for cloud performance """) tabs = st.tabs(["đ¤ Model Settings", "đ Processing", "đ§ Cache", "đ Performance"]) with tabs[0]: st.subheader("đ¤ Model Configuration") config = st.session_state.config_manager.get_config() st.info("đĄ For Spaces: Use smaller models (0.8B-1.5B parameters) for best performance") model_path = st.text_input( "Model Path", value=config['model']['path'], help="Path to your GGUF model file (smaller models recommended)" ) context_length = st.slider( "Context Length", min_value=1024, max_value=8192, value=config['model']['context_length'], step=512, help="Maximum context length (smaller = faster processing)" ) max_tokens = st.slider( "Max Response Tokens", min_value=256, max_value=4096, value=config['model']['max_tokens'], step=128, help="Maximum tokens in response (smaller = faster)" ) temperature = st.slider( "Temperature", min_value=0.0, max_value=1.0, value=config['model']['temperature'], step=0.1, help="Controls randomness (lower = more consistent)" ) with tabs[1]: st.subheader("đ Text Processing") chunk_size = st.slider( "Chunk Size", min_value=512, max_value=4096, value=config['processing']['chunk_size'], step=256, help="Text chunk size (smaller = more memory efficient)" ) chunk_overlap = st.slider( "Chunk Overlap", min_value=32, max_value=512, value=config['processing']['chunk_overlap'], step=32, help="Overlap between chunks for context" ) batch_size = st.slider( "Batch Size", min_value=1, max_value=8, # Reduced for Spaces value=config['processing']['batch_size'], step=1, help="Number of chunks to process at once (lower = less memory)" ) with tabs[2]: st.subheader("đ§ Cache Configuration") enable_cache = st.checkbox( "Enable Caching", value=config['cache']['enabled'], help="Use cache to avoid re-processing (recommended)" ) st.info(f"đĄ Max cache size: {SPACES_CONFIG['max_memory_mb']}MB (fixed for Spaces)") cache_ttl = st.slider( "Cache TTL (hours)", min_value=0.5, max_value=2.0, value=config['cache']['ttl_hours'], step=0.5, help="How long to keep cached results (shorter = less memory)" ) with tabs[3]: st.subheader("đ Performance Monitoring") perf_stats = st.session_state.performance_monitor.get_stats() col1, col2, col3 = st.columns(3) with col1: st.metric("Memory Usage", ".1f", "MB") with col2: st.metric("Cache Hit Rate", ".1f", "%") with col3: st.metric("Active Threads", perf_stats.get('active_threads', 0)) # Performance recommendations recommendations = st.session_state.performance_monitor.get_recommendations() if recommendations: st.subheader("đĄ Recommendations") for rec in recommendations: if "High" in rec or "Low" in rec: st.warning(rec) else: st.info(rec) # Save settings col1, col2 = st.columns([1, 1]) with col1: if st.button("đž Save Settings", type="primary", use_container_width=True): new_config = { 'model': { 'path': model_path, 'context_length': context_length, 'max_tokens': max_tokens, 'temperature': temperature }, 'processing': { 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'batch_size': batch_size }, 'cache': { 'enabled': enable_cache, 'ttl_hours': cache_ttl } } st.session_state.config_manager.update_config(new_config) st.success("Settings saved successfully!") with col2: if st.button("đ Reset to Defaults", use_container_width=True): st.session_state.config_manager.reset_to_defaults() st.success("Settings reset to defaults!") st.rerun() def show_spaces_optimized_performance_page(): """Performance page optimized for Spaces""" st.title("đ Performance Dashboard") # Spaces-specific info with st.expander("đ Spaces Performance Notes", expanded=False): st.info(""" **Spaces Environment:** - Memory limit: ~2-8GB shared - Cache: Session-based only - Performance: Optimized for cloud - Monitoring: Real-time metrics """) # Real-time metrics st.subheader("đ Real-time Metrics") col1, col2, col3, col4 = st.columns(4) perf_stats = st.session_state.performance_monitor.get_stats() with col1: st.metric("Memory Usage", ".1f", "MB") with col2: st.metric("Memory %", ".1f", "%") with col3: st.metric("CPU Usage", ".1f", "%") with col4: cache_stats = st.session_state.cache_manager.get_stats() st.metric("Cache Hit Rate", ".1f", "%") # Memory warning for Spaces memory_percent = perf_stats.get('memory_percent', 0) if memory_percent > 80: st.error("â ī¸ High memory usage - consider clearing cache") elif memory_percent > 60: st.warning("â ī¸ Moderate memory usage") else: st.success("â Memory usage within limits") # Cache performance st.subheader("đ§ Cache Performance") cache_stats = st.session_state.cache_manager.get_stats() col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Requests", cache_stats['hits'] + cache_stats['misses']) with col2: st.metric("Cache Hits", cache_stats['hits']) with col3: st.metric("Cache Misses", cache_stats['misses']) with col4: st.metric("Hit Rate", ".1f") # Performance recommendations st.subheader("đĄ Performance Recommendations") recommendations = st.session_state.performance_monitor.get_recommendations() if recommendations: for rec in recommendations: if "High" in rec or "Low" in rec: st.error(rec) elif "Moderate" in rec or "Consider" in rec: st.warning(rec) else: st.info(rec) else: st.success("â Performance is optimal!") # Cache management st.subheader("đ§ Cache Management") col1, col2 = st.columns(2) with col1: if st.button("đ Clear Cache", type="secondary", use_container_width=True): st.session_state.cache_manager.clear_cache() st.success("Cache cleared successfully!") st.rerun() with col2: if st.button("đ Reset Statistics", use_container_width=True): st.session_state.performance_monitor.reset_stats() st.success("Statistics reset!") st.rerun() def main(): """Main application function for Spaces""" # Initialize session state initialize_spaces_session() # Create sidebar with navigation and status with st.sidebar: st.title("âī¸ NZ Legislation Analyzer") st.markdown("---") st.markdown("**Spaces Edition**") st.markdown("---") # Navigation pages = { "đ Home": "home", "đ¤ Upload & Process": "upload", "đ Analysis Results": "results", "âī¸ Settings": "settings", "đ Performance": "performance" } selected_page = st.selectbox( "Navigate to:", list(pages.keys()), key="nav_select" ) st.markdown("---") # Cache status with st.expander("đ§ Cache Status", expanded=True): cache_stats = st.session_state.cache_manager.get_stats() st.metric("Cache Hits", cache_stats['hits']) st.metric("Cache Misses", cache_stats['misses']) st.metric("Hit Rate", ".1f") st.metric("Cached Chunks", cache_stats['entries']) if st.button("Clear Cache", type="secondary"): st.session_state.cache_manager.clear_cache() st.rerun() # Performance metrics with st.expander("đ Performance", expanded=True): perf_stats = st.session_state.performance_monitor.get_stats() st.metric("Memory Usage", ".1f") st.metric("CPU Usage", ".1f") # Processing status if st.session_state.processing_status['is_running']: with st.expander("đ Processing Status", expanded=True): st.progress(st.session_state.processing_status['progress']) st.text(st.session_state.processing_status['current_task']) st.text(f"Chunk {st.session_state.processing_status['processed_chunks']}/" f"{st.session_state.processing_status['total_chunks']}") # Main content area page = pages[selected_page] if page == "home": show_spaces_optimized_home_page() elif page == "upload": show_spaces_optimized_upload_page() elif page == "results": show_spaces_optimized_results_page() elif page == "settings": show_spaces_optimized_settings_page() elif page == "performance": show_spaces_optimized_performance_page() # Footer with Spaces branding st.markdown("---") st.markdown( """