Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from transformers import pipeline | |
| # Load fast, open-access model | |
| llm = pipeline("text2text-generation", model="google/flan-t5-base") | |
| def extract_text(url): | |
| response = requests.get(url, timeout=10) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| return soup.get_text(separator="\n") | |
| def chunk_text(text, chunk_size=3000): | |
| return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| def refine_chunk(chunk, instruction): | |
| prompt = f""" | |
| {instruction} | |
| Content: | |
| {chunk} | |
| """ | |
| result = llm(prompt, max_new_tokens=512)[0]["generated_text"] | |
| return result | |
| def streamed_pipeline(url, instruction): | |
| try: | |
| raw_text = extract_text(url) | |
| chunks = chunk_text(raw_text) | |
| for i, chunk in enumerate(chunks): | |
| result = refine_chunk(chunk, instruction) | |
| yield f"### Section {i+1}\n{result}\n\n" | |
| except Exception as e: | |
| yield f"Error: {str(e)}" | |
| demo = gr.Interface( | |
| fn=streamed_pipeline, | |
| inputs=[ | |
| gr.Textbox(label="π Enter Webpage URL"), | |
| gr.Textbox(label="π§ Instruction", placeholder="e.g. Clean and format this for GPT2 training") | |
| ], | |
| outputs=gr.Textbox(label="π Streaming Output", lines=40, max_lines=80, interactive=False), | |
| title="π§ Real-Time Chunked Refiner", | |
| description="Crawls full webpage, breaks into chunks, and streams refined output section-by-section using Flan-T5." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |