|  | from src.table_creator.table_extractor import TableExtraction | 
					
						
						|  | import streamlit as st | 
					
						
						|  | import base64 | 
					
						
						|  | from PIL import Image | 
					
						
						|  | import os | 
					
						
						|  | import cv2 | 
					
						
						|  | import numpy as np | 
					
						
						|  | import tempfile | 
					
						
						|  | import traceback | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if 'tab_ext' not in st.session_state: | 
					
						
						|  | st.session_state.tab_ext = TableExtraction() | 
					
						
						|  | print('Models loaded.') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def process_image(imgpath): | 
					
						
						|  | return st.session_state.tab_ext.detect(imgpath) | 
					
						
						|  |  | 
					
						
						|  | def draw_bounding_box(image, bbox): | 
					
						
						|  | """Draw a bounding box on the image""" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | img_array = np.array(image) | 
					
						
						|  | if len(img_array.shape) == 3: | 
					
						
						|  | img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) | 
					
						
						|  |  | 
					
						
						|  | x_min, y_min, x_max, y_max = bbox | 
					
						
						|  | cv2.rectangle(img_array, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) | 
					
						
						|  |  | 
					
						
						|  | if len(img_array.shape) == 3: | 
					
						
						|  | img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB) | 
					
						
						|  |  | 
					
						
						|  | return Image.fromarray(img_array) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.set_page_config( | 
					
						
						|  | page_title="Table Extraction Tool", | 
					
						
						|  | layout="wide", | 
					
						
						|  | initial_sidebar_state="expanded" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <style> | 
					
						
						|  | /* Main container and background */ | 
					
						
						|  | .main { padding: 1.5rem; } | 
					
						
						|  | .stApp { | 
					
						
						|  | background: linear-gradient(135deg, #f6f9fc 0%, #f0f4f8 100%); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Header styling */ | 
					
						
						|  | .main-header { | 
					
						
						|  | background: linear-gradient(90deg, #1a365d 0%, #2563eb 100%); | 
					
						
						|  | color: white; | 
					
						
						|  | padding: 2rem 3rem; | 
					
						
						|  | border-radius: 15px; | 
					
						
						|  | margin-bottom: 2rem; | 
					
						
						|  | box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .main-header h1 { | 
					
						
						|  | font-size: 2.5rem; | 
					
						
						|  | margin-bottom: 0.5rem; | 
					
						
						|  | font-weight: 600; | 
					
						
						|  | color: white; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .main-header p { | 
					
						
						|  | font-size: 1.1rem; | 
					
						
						|  | opacity: 0.9; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Card containers */ | 
					
						
						|  | .content-card { | 
					
						
						|  | background-color: white; | 
					
						
						|  | padding: 1.5rem; | 
					
						
						|  | border-radius: 12px; | 
					
						
						|  | box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); | 
					
						
						|  | border: 1px solid #e5e7eb; | 
					
						
						|  | margin-bottom: 1.5rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Upload section - Reduced size */ | 
					
						
						|  | .upload-section { | 
					
						
						|  | text-align: center; | 
					
						
						|  | padding: 1rem; | 
					
						
						|  | border: 2px dashed #e5e7eb; | 
					
						
						|  | border-radius: 12px; | 
					
						
						|  | background-color: #f8fafc; | 
					
						
						|  | max-width: 600px; | 
					
						
						|  | margin: 0 auto; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .upload-icon { | 
					
						
						|  | font-size: 1.5rem; | 
					
						
						|  | color: #2563eb; | 
					
						
						|  | margin-bottom: 0.5rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Results section */ | 
					
						
						|  | .results-header { | 
					
						
						|  | font-size: 1.25rem; | 
					
						
						|  | color: #1f2937; | 
					
						
						|  | margin-bottom: 1rem; | 
					
						
						|  | padding-bottom: 0.5rem; | 
					
						
						|  | border-bottom: 2px solid #e5e7eb; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Download buttons */ | 
					
						
						|  | .download-button { | 
					
						
						|  | display: inline-block; | 
					
						
						|  | padding: 0.75rem 1.5rem; | 
					
						
						|  | background-color: #2563eb; | 
					
						
						|  | color: white; | 
					
						
						|  | text-decoration: none; | 
					
						
						|  | border-radius: 8px; | 
					
						
						|  | transition: all 0.2s; | 
					
						
						|  | text-align: center; | 
					
						
						|  | width: 100%; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .download-button:hover { | 
					
						
						|  | background-color: #1d4ed8; | 
					
						
						|  | box-shadow: 0 4px 6px -1px rgba(37, 99, 235, 0.2); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Tabs styling */ | 
					
						
						|  | .stTabs [data-baseweb="tab-list"] { | 
					
						
						|  | gap: 1rem; | 
					
						
						|  | background-color: #f8fafc; | 
					
						
						|  | padding: 0.5rem; | 
					
						
						|  | border-radius: 8px; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .stTabs [data-baseweb="tab"] { | 
					
						
						|  | color: #4b5563; | 
					
						
						|  | font-weight: 500; | 
					
						
						|  | padding: 0.5rem 1.5rem; | 
					
						
						|  | border-radius: 6px; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .stTabs [data-baseweb="tab"][aria-selected="true"] { | 
					
						
						|  | background-color: #2563eb; | 
					
						
						|  | color: white; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | /* Guide section styling */ | 
					
						
						|  | .guide-section { | 
					
						
						|  | background-color: white; | 
					
						
						|  | padding: 2rem; | 
					
						
						|  | border-radius: 12px; | 
					
						
						|  | margin-bottom: 1.5rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .guide-header { | 
					
						
						|  | color: #1a365d; | 
					
						
						|  | font-size: 1.5rem; | 
					
						
						|  | margin-bottom: 1rem; | 
					
						
						|  | border-bottom: 2px solid #e5e7eb; | 
					
						
						|  | padding-bottom: 0.5rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .guide-subheader { | 
					
						
						|  | color: #2563eb; | 
					
						
						|  | font-size: 1.2rem; | 
					
						
						|  | margin: 1.5rem 0 0.5rem 0; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .guide-text { | 
					
						
						|  | color: #4b5563; | 
					
						
						|  | line-height: 1.6; | 
					
						
						|  | margin-bottom: 1rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .feature-card { | 
					
						
						|  | background-color: #f8fafc; | 
					
						
						|  | padding: 1rem; | 
					
						
						|  | border-radius: 8px; | 
					
						
						|  | margin-bottom: 1rem; | 
					
						
						|  | border-left: 4px solid #2563eb; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .step-container { | 
					
						
						|  | display: flex; | 
					
						
						|  | align-items: flex-start; | 
					
						
						|  | margin-bottom: 1rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .step-number { | 
					
						
						|  | background-color: #2563eb; | 
					
						
						|  | color: white; | 
					
						
						|  | width: 24px; | 
					
						
						|  | height: 24px; | 
					
						
						|  | border-radius: 12px; | 
					
						
						|  | display: flex; | 
					
						
						|  | align-items: center; | 
					
						
						|  | justify-content: center; | 
					
						
						|  | margin-right: 1rem; | 
					
						
						|  | flex-shrink: 0; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .info-icon { | 
					
						
						|  | color: #2563eb; | 
					
						
						|  | margin-right: 0.5rem; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | .tech-details { | 
					
						
						|  | background-color: #f0f9ff; | 
					
						
						|  | padding: 1rem; | 
					
						
						|  | border-radius: 8px; | 
					
						
						|  | margin: 1rem 0; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | </style> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.sidebar: | 
					
						
						|  |  | 
					
						
						|  | st.divider() | 
					
						
						|  | st.markdown('<h2 class="guide-header">π User Guide</h2>', unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown('<h3 class="guide-subheader">π― How It Works</h3>', unsafe_allow_html=True) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div class="guide-text"> | 
					
						
						|  | This tool uses advanced computer vision and machine learning techniques to: | 
					
						
						|  | <ul> | 
					
						
						|  | <li>Detect and locate tables in document images</li> | 
					
						
						|  | <li>Extract structured data from the detected tables</li> | 
					
						
						|  | <li>Convert the data into easily manageable formats</li> | 
					
						
						|  | </ul> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown('<h3 class="guide-subheader">π Usage Instructions</h3>', unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div class="step-container"> | 
					
						
						|  | <div class="step-number">1</div> | 
					
						
						|  | <div class="guide-text">Upload a document image containing a table (JPG, or JPEG format)</div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <div class="step-container"> | 
					
						
						|  | <div class="step-number">2</div> | 
					
						
						|  | <div class="guide-text">The tool will automatically detect and highlight the table in your image</div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <div class="step-container"> | 
					
						
						|  | <div class="step-number">3</div> | 
					
						
						|  | <div class="guide-text">View both raw and enhanced versions of the extracted data</div> | 
					
						
						|  | </div> | 
					
						
						|  |  | 
					
						
						|  | <div class="step-container"> | 
					
						
						|  | <div class="step-number">4</div> | 
					
						
						|  | <div class="guide-text">Download the results in CSV format for further use</div> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown('<h3 class="guide-subheader">π‘ Best Practices</h3>', unsafe_allow_html=True) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div class="feature-card"> | 
					
						
						|  | <strong>For Best Results:</strong> | 
					
						
						|  | <ul> | 
					
						
						|  | <li>Use clear, high-resolution images</li> | 
					
						
						|  | <li>Ensure tables have well-defined borders</li> | 
					
						
						|  | <li>Avoid skewed or rotated images</li> | 
					
						
						|  | <li>Make sure text is clearly readable</li> | 
					
						
						|  | </ul> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.expander("π§ Technical Details"): | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div class="tech-details"> | 
					
						
						|  | <p><strong>Algorithm Overview:</strong></p> | 
					
						
						|  | <ul> | 
					
						
						|  | <li>Uses computer vision for table boundary detection</li> | 
					
						
						|  | <li>Employs OCR (Optical Character Recognition) for text extraction</li> | 
					
						
						|  | <li>Implements intelligent cell segmentation</li> | 
					
						
						|  | <li>Applies post-processing for enhanced accuracy</li> | 
					
						
						|  | </ul> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown('<h3 class="guide-subheader">π Connect with Me</h3>', unsafe_allow_html=True) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div class="guide-text" style="font-size: 1rem;"> | 
					
						
						|  | If you encounter any issues or have questions, feel free to reach out: | 
					
						
						|  | <a href="https://github.com/Sudhanshu1304" target="_blank" style="text-decoration: none;"> | 
					
						
						|  | <img src="https://img.icons8.com/ios-filled/20/000000/github.png" alt="GitHub" style="vertical-align: middle; margin-right: 5px;"/> | 
					
						
						|  | GitHub | 
					
						
						|  | </a> | | 
					
						
						|  | <a href="https://www.linkedin.com/in/sudhanshu-pandey-847448193/" target="_blank" style="text-decoration: none;"> | 
					
						
						|  | <img src="https://img.icons8.com/ios-filled/20/000000/linkedin.png" alt="LinkedIn" style="vertical-align: middle; margin-right: 5px;"/> | 
					
						
						|  | LinkedIn | 
					
						
						|  | </a> | | 
					
						
						|  | <a href="https://medium.com/@sudhanshu.dpandey" target="_blank" style="text-decoration: none;"> | 
					
						
						|  | <img src="https://img.icons8.com/ios-filled/20/000000/medium-logo.png" alt="Medium" style="vertical-align: middle; margin-right: 5px;"/> | 
					
						
						|  | Medium | 
					
						
						|  | </a> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if 'is_expanded' not in st.session_state: | 
					
						
						|  | st.session_state.is_expanded = False | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div class="main-header"> | 
					
						
						|  | <h1>π Table Extraction Tool</h1> | 
					
						
						|  | <p>Upload an image containing tables and instantly convert them into structured data formats.</p> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | uploaded_file = st.file_uploader("", type=['png', 'jpg', 'jpeg']) | 
					
						
						|  | st.markdown('</div>', unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if uploaded_file is not None: | 
					
						
						|  | with st.spinner('π Processing your image...'): | 
					
						
						|  | with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file: | 
					
						
						|  | tmp_file.write(uploaded_file.getvalue()) | 
					
						
						|  | temp_path = tmp_file.name | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | image = Image.open(uploaded_file) | 
					
						
						|  | (raw_df, cleaned_df), bbox = process_image(temp_path) | 
					
						
						|  |  | 
					
						
						|  | st.session_state.raw_data = raw_df | 
					
						
						|  | st.session_state.processed_data = cleaned_df | 
					
						
						|  | marked_image = draw_bounding_box(image, bbox[0]) | 
					
						
						|  | st.session_state.marked_image = marked_image | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | col1, col2 = st.columns([0.4, 0.6]) | 
					
						
						|  |  | 
					
						
						|  | with col1: | 
					
						
						|  |  | 
					
						
						|  | st.divider() | 
					
						
						|  | st.markdown('<h3 class="results-header">Detected Table</h3>', unsafe_allow_html=True) | 
					
						
						|  | st.image(marked_image, use_container_width=True) | 
					
						
						|  | st.markdown('</div>', unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  | with col2: | 
					
						
						|  |  | 
					
						
						|  | st.divider() | 
					
						
						|  | st.markdown('<h3 class="results-header">Extracted Data</h3>', unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tabs = st.tabs(["π Raw Data", "β¨ Enhanced Data β"]) | 
					
						
						|  |  | 
					
						
						|  | with tabs[0]: | 
					
						
						|  | st.dataframe(st.session_state.raw_data, | 
					
						
						|  | use_container_width=True, | 
					
						
						|  | height=600 if not st.session_state.is_expanded else None) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown("### π Copy HTML Table") | 
					
						
						|  | html_raw = st.session_state.raw_data.to_html(index=False) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div style="background-color: #f8fafc; padding: 0.5rem; border-radius: 8px; margin-bottom: 0.5rem;"> | 
					
						
						|  | <p style="margin: 0; color: #475569; font-size: 0.9rem;"> | 
					
						
						|  | βΉοΈ This HTML can be copied and used directly in websites, LLM prompts, or other applications. | 
					
						
						|  | </p> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div style="max-height: 150px; overflow-y: auto; border-radius: 8px;"> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  | st.code(html_raw, language="html") | 
					
						
						|  | st.markdown("</div>", unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  | with tabs[1]: | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div style="background-color: #f0f9ff; padding: 1rem; border-radius: 8px; margin-bottom: 1rem;"> | 
					
						
						|  | <p style="margin: 0; color: #1e40af;"> | 
					
						
						|  | β This is our enhanced version of the table with improved formatting and structure. | 
					
						
						|  | </p> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  | st.dataframe(st.session_state.processed_data, | 
					
						
						|  | use_container_width=True, | 
					
						
						|  | height=600 if not st.session_state.is_expanded else None) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown("### π Copy HTML Table") | 
					
						
						|  | html_enhanced = st.session_state.processed_data.to_html(index=False) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div style="background-color: #f8fafc; padding: 0.5rem; border-radius: 8px; margin-bottom: 0.5rem;"> | 
					
						
						|  | <p style="margin: 0; color: #475569; font-size: 0.9rem;"> | 
					
						
						|  | βΉοΈ This HTML can be copied and used directly in websites, LLM prompts, or other applications. | 
					
						
						|  | </p> | 
					
						
						|  | </div> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  | st.markdown(""" | 
					
						
						|  | <div style="max-height: 150px; overflow-y: auto; border-radius: 8px;"> | 
					
						
						|  | """, unsafe_allow_html=True) | 
					
						
						|  | st.code(html_enhanced, language="html") | 
					
						
						|  | st.markdown("</div>", unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.divider() | 
					
						
						|  | st.markdown('<h3 class="results-header">Download Options</h3>', unsafe_allow_html=True) | 
					
						
						|  | download_cols = st.columns([1, 0.1, 1]) | 
					
						
						|  |  | 
					
						
						|  | def get_csv_download_link(df, filename): | 
					
						
						|  | csv = df.to_csv(index=False).encode() | 
					
						
						|  | b64 = base64.b64encode(csv).decode() | 
					
						
						|  | return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-button">π₯ Download {filename}</a>' | 
					
						
						|  |  | 
					
						
						|  | with download_cols[0]: | 
					
						
						|  | if 'raw_data' in st.session_state: | 
					
						
						|  | csv = st.session_state.raw_data.to_csv(index=False) | 
					
						
						|  | st.download_button( | 
					
						
						|  | label="π₯ Download Raw Data", | 
					
						
						|  | data=csv, | 
					
						
						|  | file_name="raw_data.csv", | 
					
						
						|  | mime="text/csv", | 
					
						
						|  | use_container_width=True, | 
					
						
						|  | key="raw_download" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | with download_cols[2]: | 
					
						
						|  | if 'processed_data' in st.session_state: | 
					
						
						|  | csv = st.session_state.processed_data.to_csv(index=False) | 
					
						
						|  | st.download_button( | 
					
						
						|  | label="π₯ Download Enhanced Data β", | 
					
						
						|  | data=csv, | 
					
						
						|  | file_name="enhanced_data.csv", | 
					
						
						|  | mime="text/csv", | 
					
						
						|  | use_container_width=True, | 
					
						
						|  | key="enhanced_download" | 
					
						
						|  | ) | 
					
						
						|  | st.markdown('</div>', unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  | except Exception as e: | 
					
						
						|  | st.error(f"β Error processing image: {str(traceback.format_exc())}") | 
					
						
						|  |  | 
					
						
						|  | finally: | 
					
						
						|  | try: | 
					
						
						|  | os.unlink(temp_path) | 
					
						
						|  | except Exception as e: | 
					
						
						|  | st.warning(f"β οΈ Error removing temporary file: {str(e)}") | 
					
						
						|  |  |