import gradio as gr import os import time import requests from datetime import datetime from langchain_openai import ChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.messages import HumanMessage from langchain_core.caches import BaseCache from langchain_core.callbacks import Callbacks ChatGoogleGenerativeAI.model_rebuild() import pandas as pd import io import tempfile from urllib.parse import urlparse import re # Import DocLing and necessary configuration classes from docling.document_converter import DocumentConverter, PdfFormatOption from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.base_models import InputFormat # Import and rebuild ChatGoogleGenerativeAI deferred try: from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.caches import BaseCache ChatGoogleGenerativeAI.model_rebuild() except Exception as e: print(f"Warning during rebuild: {e}") from langchain_google_genai import ChatGoogleGenerativeAI # --- START OF OCR CONFIGURATION --- # Create a single, pre-configured DocumentConverter instance to be reused. # This is more efficient than creating it on every function call. # 1. Define the pipeline options to enable OCR for PDFs. # Configure a single global DocLing converter with Tesseract OCR enabled and all languages # Note: With tesseract-ocr-all installed, all language data files are available. pdf_options = PdfPipelineOptions( do_ocr=True, ocr_model="tesseract", # Provide a broad default set. With tesseract-ocr-all, many language packs exist. # You can keep this small for speed or expand it. Here we include a practical wide set. ocr_languages=[ "eng","fra","deu","spa","ita","por","nld","pol","tur","ces","rus","ukr","ell","ron","hun", "bul","hrv","srp","slk","slv","lit","lav","est","cat","eus","glg","isl","dan","nor","swe", "fin","alb","mlt","afr","zul","swa","amh","uzb","aze","kaz","kir","mon","tgl","ind","msa", "tha","vie","khm","lao","mya","ben","hin","mar","guj","pan","mal","tam","tel","kan","nep", "sin","urd","fas","pus","kur","aze_cyrl","tat","uig","heb","ara","yid","grc","chr","epo", "hye","kat","kat_old","aze_latn","mkd","bel","srp_latn","srp_cyrillic", # CJK — these are heavier and slower; include only if needed: "chi_sim","chi_tra","jpn","kor" ] ) # 2. Create the format-specific configuration. format_options = { InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_options) } # 3. Initialize the converter with the OCR configuration. # This converter will now automatically perform OCR on any PDF file. docling_converter = DocumentConverter(format_options=format_options) # --- END OF OCR CONFIGURATION --- # Model configuration MODELS = { "Gemini 2.5 Flash (Google AI)": { "provider": "Google AI", "class": ChatGoogleGenerativeAI, "model_name": "gemini-2.0-flash-exp", "default_api": True }, "ChatGPT 5 (OpenAI)": { "provider": "OpenAI", "class": ChatOpenAI, "model_name": "gpt-4o", "default_api": False }, "Claude Sonnet 4 (Anthropic)": { "provider": "Anthropic", "class": ChatAnthropic, "model_name": "claude-3-5-sonnet-20241022", "default_api": False }, "Gemini 2.5 Pro (Google AI)": { "provider": "Google AI", "class": ChatGoogleGenerativeAI, "model_name": "gemini-2.0-flash-exp", "default_api": False } } # Default API for Gemini 2.5 Flash via HF Spaces Secrets DEFAULT_GEMINI_API = os.getenv("FLASH_GOOGLE_API_KEY") def extract_text_from_file(file): """ Extract text from an uploaded file or path (str). - Accepts an object with .name attribute (e.g. Gradio upload) OR a file path (str). - DocLing for: .pdf (Tesseract OCR enabled if configured), .docx, .xlsx, .pptx - Converts .csv /.xls -> temporary .xlsx then DocLing - .txt read directly """ if file is None: return "" # Normalize to a filesystem path string path = file.name if hasattr(file, "name") else str(file) ext = os.path.splitext(path)[1].lower() docling_direct = {".pdf", ".docx", ".xlsx", ".pptx"} to_xlsx_first = {".csv", ".xls"} try: if ext in docling_direct: result = docling_converter.convert(path) return result.document.export_to_markdown() elif ext in to_xlsx_first: # Convert CSV/XLS -> XLSX if ext == ".csv": df = pd.read_csv(path) else: # .xls df = pd.read_excel(path) with tempfile.NamedTemporaryFile(delete=True, suffix=".xlsx") as tmp: df.to_excel(tmp.name, index=False) result = docling_converter.convert(tmp.name) return result.document.export_to_markdown() elif ext == ".txt": with open(path, "r", encoding="utf-8") as f: return f.read() else: return "Unsupported file format" except Exception as e: return f"Error reading file: {str(e)}" def extract_text_from_url(url): """Extract text from a URL""" try: response = requests.get(url, timeout=10) response.raise_for_status() content = response.text content = re.sub(r'<[^>]+>', '', content) content = re.sub(r'\s+', ' ', content).strip() return content[:10000] # Limit to 10k characters except Exception as e: return f"Error retrieving URL: {str(e)}" def get_document_content(text_input, url_input, file_input): """Retrieve document content based on source""" if text_input.strip(): return text_input.strip() elif url_input.strip(): return extract_text_from_url(url_input.strip()) elif file_input is not None: return extract_text_from_file(file_input) else: return "" def create_llm_instance(model_name, api_key): """Create an LLM model instance""" model_config = MODELS[model_name] if model_config["provider"] == "OpenAI": return model_config["class"]( model=model_config["model_name"], api_key=api_key, temperature=0.7 ) elif model_config["provider"] == "Anthropic": return model_config["class"]( model=model_config["model_name"], api_key=api_key, temperature=0.7 ) elif model_config["provider"] == "Google AI": api_to_use = api_key if api_key else DEFAULT_GEMINI_API return model_config["class"]( model=model_config["model_name"], google_api_key=api_to_use, temperature=0.7 ) def generate_html(model_name, api_key, text_input, url_input, file_input): """Generate educational HTML file""" start_time = time.time() if model_name != "Gemini 2.5 Flash (Google AI)" and not api_key.strip(): return None, "❌ Error: Please provide an API key for this model.", 0 document_content = get_document_content(text_input, url_input, file_input) if not document_content: return None, "❌ Error: Please provide a document (text, URL or file).", 0 try: # Create LLM instance llm = create_llm_instance(model_name, api_key) # Read prompt template with open("creation_educational_html_from_any_document_18082025.txt", "r", encoding="utf-8") as f: prompt_template = f.read() # Replace variables model_config = MODELS[model_name] prompt = prompt_template.format( model_name=model_config["model_name"], provider_name=model_config["provider"], document=document_content ) # Generate content message = HumanMessage(content=prompt) response = llm.invoke([message]) html_content = response.content # Clean any code tags from models html_content = html_content.replace("```html", "") html_content = html_content.replace("```", "") # Calculate generation time generation_time = time.time() - start_time # Save HTML file timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"educational_document_{timestamp}.html" with open(filename, "w", encoding="utf-8") as f: f.write(html_content) success_message = f"✅ HTML file generated successfully in {generation_time:.2f} seconds!" return filename, success_message, generation_time except Exception as e: error_message = f"❌ Error during generation: {str(e)}" return None, error_message, 0 def reset_form(): """Reset the form to zero""" return ( "Gemini 2.5 Flash (Google AI)", # model_name "", # api_key "", # text_input "", # url_input None, # file_input "", # status_message None, # html_file "" # html_preview ) def update_api_info(model_name): """Update API information based on selected model""" if model_name == "Gemini 2.5 Flash (Google AI)": return gr.update( label="API Key (optional)", placeholder="Free API available until exhausted, or use your own key", info="💡 A free API is already configured for this model. You can use your own key if you wish." ) else: return gr.update( label="API Key (required)", placeholder="Enter your API key", info="🔑 API key required for this model" ) # Gradio Interface (Apple-like) with gr.Blocks( title="EduHTML Creator - Educational HTML Content Generator", theme=gr.themes.Soft(), css="style.css", js="script.js" ) as app: # Header hero (black, full-width look within container) gr.HTML(""" """) with gr.Column(elem_classes=["main-container"]): # Model Configuration Section gr.HTML("
") model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), value="Gemini 2.5 Flash (Google AI)", label="LLM Model", info="Select the model to use for generation" ) api_input = gr.Textbox( label="API Key (optional)", placeholder="Free API (Gemini Flash) available. You can enter your own key.", info="For OpenAI/Anthropic, a key is required.", type="password" ) gr.HTML("
") # Document Source Section with tabs gr.HTML("
") gr.HTML("

Document Source

") with gr.Tabs(): with gr.TabItem("📝 Text"): text_input = gr.Textbox( label="Copied/pasted text", placeholder="Paste your text here...", lines=4 ) with gr.TabItem("🌐 URL"): url_input = gr.Textbox( label="Web Link", placeholder="https://example.com/article" ) with gr.TabItem("📁 File"): file_input = gr.File( label="File", file_types=[".pdf", ".txt", ".docx", ".xlsx", ".xls", ".pptx"] ) gr.HTML("
") # Action buttons with gr.Row(): submit_btn = gr.Button("Generate HTML", variant="primary", elem_classes=["apple-button"]) reset_btn = gr.Button("Reset", elem_classes=["reset-button"]) # Results Section status_output = gr.HTML(label="Status") gr.HTML("
") gr.HTML("
Preview
") html_preview = gr.HTML(label="Preview", visible=False, elem_id="html-preview", elem_classes=["preview-body"]) html_file_output = gr.File(label="Downloadable HTML file", visible=False) gr.HTML("
") # Footer (black) gr.HTML(""" """) # Events model_dropdown.change( fn=update_api_info, inputs=[model_dropdown], outputs=[api_input] ) submit_btn.click( fn=generate_html, inputs=[model_dropdown, api_input, text_input, url_input, file_input], outputs=[html_file_output, status_output, gr.State()] ).then( fn=lambda file, status, _: ( gr.update(visible=file is not None), status, gr.update(visible=file is not None, value=(open(file, 'r', encoding='utf-8').read() if file else "")) ), inputs=[html_file_output, status_output, gr.State()], outputs=[html_file_output, status_output, html_preview] ) reset_btn.click( fn=reset_form, outputs=[model_dropdown, api_input, text_input, url_input, file_input, status_output, html_file_output, html_preview] ) if __name__ == "__main__": app.launch( server_name="0.0.0.0", server_port=7860, share=True )