datarefine2

Sleeping

App Files Files Community

ghosthets commited on Sep 13

Commit

76ac794

verified ·

1 Parent(s): 9a7281b

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -42

app.py CHANGED Viewed

@@ -1,52 +1,32 @@
-import os
-import json
 import gradio as gr
-from fastmcp import FastMCP
-from huggingface_hub import HfApi, model_info
-# Hugging Face token from Space secrets
-HF_TOKEN = os.environ.get("HF_TOKEN")
-hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
-# FastMCP server setup
-mcp = FastMCP("hf-tagging-bot")
-@mcp.tool()
-def get_current_tags(repo_id: str) -> str:
-    """Get current tags from a HuggingFace model repository"""
-    if not hf_api:
-        return json.dumps({"error": "HF token not configured"})
     try:
-        info = model_info(repo_id=repo_id, token=HF_TOKEN)
-        current_tags = info.tags if info.tags else []
-        return json.dumps({
-            "status": "success",
-            "repo_id": repo_id,
-            "current_tags": current_tags,
-            "count": len(current_tags),
-        })
     except Exception as e:
-        return json.dumps({
-            "status": "error",
-            "repo_id": repo_id,
-            "error": str(e),
-        })
-# Gradio UI wrapper
-def gradio_tag_checker(repo_id):
-    return get_current_tags(repo_id)
 demo = gr.Interface(
-    fn=gradio_tag_checker,
-    inputs=gr.Textbox(label="Enter HuggingFace Model Repo ID", placeholder="e.g. bert-base-uncased"),
-    outputs=gr.Textbox(label="Current Tags (JSON)"),
-    title="🔖 HuggingFace Tag Checker",
-    description="Uses FastMCP + HuggingFace Hub SDK to fetch current tags from any model repo."
 )
-# Run both FastMCP and Gradio safely
 if __name__ == "__main__":
-    import threading
-    threading.Thread(target=mcp.run, daemon=True).start()
-    demo.launch(quiet=True, show_error=False)  # ✅ Prevent uvicorn logging crash

 import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+# Load Mixtral model (via Transformers pipeline)
+refiner = pipeline("text2text-generation", model="mistralai/Mixtral-8x7B-Instruct-v0.1")
+def refine_from_url(url, instruction):
     try:
+        response = requests.get(url, timeout=5)
+        soup = BeautifulSoup(response.text, "html.parser")
+        raw_text = soup.get_text(separator="\n")
+        prompt = f"{instruction}\n\n{raw_text[:4000]}"  # truncate for token limit
+        output = refiner(prompt, max_new_tokens=512)[0]["generated_text"]
+        return output
     except Exception as e:
+        return f"Error: {str(e)}"
 demo = gr.Interface(
+    fn=refine_from_url,
+    inputs=[
+        gr.Textbox(label="Enter URL"),
+        gr.Textbox(label="Refinement Instruction", placeholder="e.g. Clean and structure this for AI training")
+    ],
+    outputs=gr.Textbox(label="Refined Output"),
+    title="🔍 Data Refiner with Mixtral",
+    description="Crawl any webpage and refine its content using Mixtral 8x7B for AI training or research."
 )
 if __name__ == "__main__":
+    demo.launch()