ghosthets commited on
Commit
621b454
·
verified ·
1 Parent(s): 76ac794

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -3,15 +3,15 @@ import requests
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
 
6
- # Load Mixtral model (via Transformers pipeline)
7
- refiner = pipeline("text2text-generation", model="mistralai/Mixtral-8x7B-Instruct-v0.1")
8
 
9
  def refine_from_url(url, instruction):
10
  try:
11
  response = requests.get(url, timeout=5)
12
  soup = BeautifulSoup(response.text, "html.parser")
13
  raw_text = soup.get_text(separator="\n")
14
- prompt = f"{instruction}\n\n{raw_text[:4000]}" # truncate for token limit
15
  output = refiner(prompt, max_new_tokens=512)[0]["generated_text"]
16
  return output
17
  except Exception as e:
@@ -24,8 +24,8 @@ demo = gr.Interface(
24
  gr.Textbox(label="Refinement Instruction", placeholder="e.g. Clean and structure this for AI training")
25
  ],
26
  outputs=gr.Textbox(label="Refined Output"),
27
- title="🔍 Data Refiner with Mixtral",
28
- description="Crawl any webpage and refine its content using Mixtral 8x7B for AI training or research."
29
  )
30
 
31
  if __name__ == "__main__":
 
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
 
6
+ # Use open-access model
7
+ refiner = pipeline("text2text-generation", model="google/flan-t5-large")
8
 
9
  def refine_from_url(url, instruction):
10
  try:
11
  response = requests.get(url, timeout=5)
12
  soup = BeautifulSoup(response.text, "html.parser")
13
  raw_text = soup.get_text(separator="\n")
14
+ prompt = f"{instruction}\n\n{raw_text[:3000]}" # truncate for token limit
15
  output = refiner(prompt, max_new_tokens=512)[0]["generated_text"]
16
  return output
17
  except Exception as e:
 
24
  gr.Textbox(label="Refinement Instruction", placeholder="e.g. Clean and structure this for AI training")
25
  ],
26
  outputs=gr.Textbox(label="Refined Output"),
27
+ title="🧠 Data Refiner with Flan-T5",
28
+ description="Crawl any webpage and refine its content using Flan-T5 for AI training or research."
29
  )
30
 
31
  if __name__ == "__main__":