File size: 5,586 Bytes
846f2fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
###======================== Reference ========================###
# Text Detector Model: https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
# LLM Model: OpenAI GPT-5 Mini
# Agentic frameworks: LangChain, LangGraph
# UI: Gradio
###===========================================================###

import os
from urllib.parse import uses_query
from dotenv import load_dotenv

from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent

### For fetching the AI text detector model (from HF)
from transformers import pipeline
import torch

import gradio as gr

### Clean text for better detection performance
from utils import clean_text
from html_blocks import FEATURES_HTML
from css_blocks import CSS_ELEMENTS


load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_MODEL_ID = "gpt-5-mini"
TEXT_DETECTOR_MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model=OPENAI_MODEL_ID)

system_message = SystemMessage("You are a helpful assistant that most accurately can tell ai written text from human written one.")
user_input="""Brooo, you won’t believe what happened today 😭 So I’m just minding my own business, right? Grabbing coffee like a civilized human being, and this barista legit calls out “Caramel macchiato for Michael” — and I’m like “bet, that’s me,” grab it, walk out all confident… only to realize halfway down the street it says soy latte, extra shot, no whip 😭😭 Bro, I straight up stole someone’s drink and been sipping on sadness the whole way home. It was actually kinda fire tho ngl 😅 Anyway, how’s your day been? You still surviving or already plotting your escape from adulthood?
"""


@tool
def detect_text(text):
    """ Process AI text detection using fakespot-ai/roberta-base-ai-text-detection-v1 model.
    It returns its result in a List[Dict] form.
    e.g. [{'label': 'AI', 'score': 0.9998624324798584}]
    """
    classifier = pipeline(
        "text-classification",
        model=TEXT_DETECTOR_MODEL_ID
    )

    cleaned_text = classifier(clean_text(text))
    label_result = cleaned_text[0]["label"]
    score_result = cleaned_text[0]["score"]

    return label_result, score_result

def generate_dynamic_query(text, label, score):

    query = f"""
    Detect the text of the given input: {text}  
    AI-text likelihood score is given by another open source llm and the score will be given through the tool call as below:
    Label: {label}
    Score: {score:.3f}
    
    Based on all those(input text for your own analysis) and the score given by the function,
    give the final answer within 3-5 lines why it's assumed to be human or AI written text in a narrative and descriptive manner.
    """
    return query


def run_agent(text_input):

    ### Run the text detector model and extract label(whether AI or Human text) and likelihood score
    ### The result can be passively sent to the LLM but explicitly extracts label and score
    ### as the LLM doesn't take the accurate values by tool calling, for some reason I couldn't figure out
    ### So in that way, the run_agent calls detect_text twice which results in a higher latency for a couple more seconds.
    label, score = detect_text(text_input)
    query = generate_dynamic_query(text_input, label, score)

    tools = [detect_text]

    ### Issue: Tool calling is actually redundant in this case -- as label, score = detect_text(user_text) actually runs the function separately and extract all the llm needs to take
    agent_executor = create_react_agent(
        model=llm, tools=tools, prompt=system_message
    )

    result = agent_executor.invoke(
        {"messages": [{"role": "user", "content": query}]}
    )

    ### Process output formats
    label = "🧒 Likely " + label if label == "Human" else "🤖 Likely " + label
    score = f"{score * 100: .2f}" + "%"

    ### Return 3 elements in a tuple: content, text label and score
    return result["messages"][-1].content, label, score


###================= User Interface =================###

with gr.Blocks(css=CSS_ELEMENTS, title="AI WRITTEN TEXT DETECTOR") as demo:
    gr.Markdown(
        """
        ## 🕵️ AI WRITTEN TEXT DETECTOR
        Analyze your text and discover how likely it is to be AI-generated.
        """,
        elem_id="app_title"
    )
    with gr.Column():
        with gr.Row():
            inp = gr.TextArea(placeholder="Paste your text to analyze", label="Text Input", lines=20, scale=2)
            features_desc = gr.HTML(FEATURES_HTML, elem_classes=["features_description"])

        button = gr.Button("Analyze Text", elem_classes=["analyze_btn"])

    with gr.Column():
        with gr.Row():
            label_output = gr.Textbox(
                label="Detected Label",
                placeholder="AI / Human",
                scale=1
            )
            score_output = gr.Textbox(
                label="Confidence Score",
                placeholder="0.000",
                scale=1
            )

        analysis_output = gr.TextArea(
            label="Analysis Result",
            placeholder="Model's explanation will appear here...",
            lines=6
        )

    button.click(
        fn=run_agent,
        inputs=inp,
        outputs=[analysis_output, label_output, score_output]
    )

if __name__ == "__main__":
    port = os.getenv("PORT", 7860)
    demo.launch(server_name="0.0.0.0", server_port=port)