File size: 5,586 Bytes
846f2fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
###======================== Reference ========================###
# Text Detector Model: https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
# LLM Model: OpenAI GPT-5 Mini
# Agentic frameworks: LangChain, LangGraph
# UI: Gradio
###===========================================================###
import os
from urllib.parse import uses_query
from dotenv import load_dotenv
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
### For fetching the AI text detector model (from HF)
from transformers import pipeline
import torch
import gradio as gr
### Clean text for better detection performance
from utils import clean_text
from html_blocks import FEATURES_HTML
from css_blocks import CSS_ELEMENTS
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_MODEL_ID = "gpt-5-mini"
TEXT_DETECTOR_MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model=OPENAI_MODEL_ID)
system_message = SystemMessage("You are a helpful assistant that most accurately can tell ai written text from human written one.")
user_input="""Brooo, you won’t believe what happened today 😭 So I’m just minding my own business, right? Grabbing coffee like a civilized human being, and this barista legit calls out “Caramel macchiato for Michael” — and I’m like “bet, that’s me,” grab it, walk out all confident… only to realize halfway down the street it says soy latte, extra shot, no whip 😭😭 Bro, I straight up stole someone’s drink and been sipping on sadness the whole way home. It was actually kinda fire tho ngl 😅 Anyway, how’s your day been? You still surviving or already plotting your escape from adulthood?
"""
@tool
def detect_text(text):
""" Process AI text detection using fakespot-ai/roberta-base-ai-text-detection-v1 model.
It returns its result in a List[Dict] form.
e.g. [{'label': 'AI', 'score': 0.9998624324798584}]
"""
classifier = pipeline(
"text-classification",
model=TEXT_DETECTOR_MODEL_ID
)
cleaned_text = classifier(clean_text(text))
label_result = cleaned_text[0]["label"]
score_result = cleaned_text[0]["score"]
return label_result, score_result
def generate_dynamic_query(text, label, score):
query = f"""
Detect the text of the given input: {text}
AI-text likelihood score is given by another open source llm and the score will be given through the tool call as below:
Label: {label}
Score: {score:.3f}
Based on all those(input text for your own analysis) and the score given by the function,
give the final answer within 3-5 lines why it's assumed to be human or AI written text in a narrative and descriptive manner.
"""
return query
def run_agent(text_input):
### Run the text detector model and extract label(whether AI or Human text) and likelihood score
### The result can be passively sent to the LLM but explicitly extracts label and score
### as the LLM doesn't take the accurate values by tool calling, for some reason I couldn't figure out
### So in that way, the run_agent calls detect_text twice which results in a higher latency for a couple more seconds.
label, score = detect_text(text_input)
query = generate_dynamic_query(text_input, label, score)
tools = [detect_text]
### Issue: Tool calling is actually redundant in this case -- as label, score = detect_text(user_text) actually runs the function separately and extract all the llm needs to take
agent_executor = create_react_agent(
model=llm, tools=tools, prompt=system_message
)
result = agent_executor.invoke(
{"messages": [{"role": "user", "content": query}]}
)
### Process output formats
label = "🧒 Likely " + label if label == "Human" else "🤖 Likely " + label
score = f"{score * 100: .2f}" + "%"
### Return 3 elements in a tuple: content, text label and score
return result["messages"][-1].content, label, score
###================= User Interface =================###
with gr.Blocks(css=CSS_ELEMENTS, title="AI WRITTEN TEXT DETECTOR") as demo:
gr.Markdown(
"""
## 🕵️ AI WRITTEN TEXT DETECTOR
Analyze your text and discover how likely it is to be AI-generated.
""",
elem_id="app_title"
)
with gr.Column():
with gr.Row():
inp = gr.TextArea(placeholder="Paste your text to analyze", label="Text Input", lines=20, scale=2)
features_desc = gr.HTML(FEATURES_HTML, elem_classes=["features_description"])
button = gr.Button("Analyze Text", elem_classes=["analyze_btn"])
with gr.Column():
with gr.Row():
label_output = gr.Textbox(
label="Detected Label",
placeholder="AI / Human",
scale=1
)
score_output = gr.Textbox(
label="Confidence Score",
placeholder="0.000",
scale=1
)
analysis_output = gr.TextArea(
label="Analysis Result",
placeholder="Model's explanation will appear here...",
lines=6
)
button.click(
fn=run_agent,
inputs=inp,
outputs=[analysis_output, label_output, score_output]
)
if __name__ == "__main__":
port = os.getenv("PORT", 7860)
demo.launch(server_name="0.0.0.0", server_port=port)
|