Spaces:

MightyOctopus
/

ai-written-text-detector

Running

App Files Files Community

ai-written-text-detector / app.py

MightyOctopus

Update app.py

6232d23 verified about 2 months ago

raw

history blame contribute delete

6.35 kB

	###======================== Reference ========================###
	# Text Detector Model: https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
	# LLM Model: OpenAI GPT-5 Mini
	# Agentic frameworks: LangChain, LangGraph
	# UI: Gradio
	###===========================================================###

	import os
	from urllib.parse import uses_query
	from dotenv import load_dotenv

	from langchain_core.messages import SystemMessage, HumanMessage
	from langchain_core.tools import tool
	from langchain_openai import ChatOpenAI
	from langgraph.prebuilt import create_react_agent

	### For fetching the AI text detector model (from HF)
	from transformers import pipeline
	import torch

	import gradio as gr

	### Clean text for better detection performance
	from utils import clean_text
	from html_blocks import FEATURES_HTML, NOTE_HTML
	from css_blocks import CSS_ELEMENTS


	load_dotenv()

	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	OPENAI_MODEL_ID = "gpt-5-mini"
	TEXT_DETECTOR_MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
	llm = ChatOpenAI(api_key=OPENAI_API_KEY, model=OPENAI_MODEL_ID)

	system_message = SystemMessage("You are a helpful assistant that most accurately can tell ai written text from human written one.")
	user_input="""Brooo, you won’t believe what happened today 😭 So I’m just minding my own business, right? Grabbing coffee like a civilized human being, and this barista legit calls out “Caramel macchiato for Michael” — and I’m like “bet, that’s me,” grab it, walk out all confident… only to realize halfway down the street it says soy latte, extra shot, no whip 😭😭 Bro, I straight up stole someone’s drink and been sipping on sadness the whole way home. It was actually kinda fire tho ngl 😅 Anyway, how’s your day been? You still surviving or already plotting your escape from adulthood?
	"""


	@tool
	def detect_text(text):
	""" Process AI text detection using fakespot-ai/roberta-base-ai-text-detection-v1 model.
	It returns its result in a List[Dict] form.
	e.g. [{'label': 'AI', 'score': 0.9998624324798584}]
	"""
	classifier = pipeline(
	"text-classification",
	model=TEXT_DETECTOR_MODEL_ID
	)

	### Split text into max 300 words into multiple batches in case the text contains more than 350 words
	### due to the max token limit of the detection model
	num_words = 300
	batched_words = []
	words: List = text.split()

	if len(text.split()) >= num_words:
	for i in range(0, len(words), num_words):
	chunk = " ".join(words[i: i + num_words])
	batched_words.append(chunk)

	### Works with a batch of texts
	cleaned_text = classifier([clean_text(t) for t in batched_words])
	label_result = cleaned_text[0]["label"]
	score_result = cleaned_text[0]["score"]

	return label_result, score_result
	else:
	cleaned_text = classifier(clean_text(text))
	label_result = cleaned_text[0]["label"]
	score_result = cleaned_text[0]["score"]

	return label_result, score_result


	def generate_dynamic_query(text, label, score):

	query = f"""
	Detect the text of the given input: {text}
	AI-text likelihood score is given by another open source llm and the score will be given through the tool call as below:
	Label: {label}
	Score: {score:.3f}

	Based on all those(input text for your own analysis) and the score given by the function,
	give the final answer within 3-5 lines why it's assumed to be human or AI written text in a narrative and descriptive manner.
	"""
	return query


	def run_agent(text_input):

	### Run the text detector model and extract label(whether AI or Human text) and likelihood score
	### The result can be passively sent to the LLM but explicitly extracts label and score
	### as the LLM doesn't take the accurate values by tool calling, for some reason I couldn't figure out
	### So in that way, the run_agent calls detect_text twice which results in a higher latency for a couple more seconds.
	label, score = detect_text(text_input)
	query = generate_dynamic_query(text_input, label, score)

	tools = [detect_text]

	### Issue: Tool calling is actually redundant in this case -- as label, score = detect_text(user_text) actually runs the function separately and extract all the llm needs to take
	agent_executor = create_react_agent(
	model=llm, tools=tools, prompt=system_message
	)

	result = agent_executor.invoke(
	{"messages": [{"role": "user", "content": query}]}
	)

	### Process output formats
	label = "🧒 Likely " + label if label == "Human" else "🤖 Likely " + label
	score = f"{score * 100: .2f}" + "%"

	### Return 3 elements in a tuple: content, text label and score
	return result["messages"][-1].content, label, score


	###================= User Interface =================###

	with gr.Blocks(css=CSS_ELEMENTS, title="AI WRITTEN TEXT DETECTOR") as demo:
	gr.Markdown(
	"""
	## 🕵️ AI GENERATED TEXT DETECTOR
	Analyze your text and discover how likely it is to be AI-generated.
	""",
	elem_id="app_title"
	)
	with gr.Column():
	with gr.Row():
	inp = gr.TextArea(placeholder="Paste your text to analyze", label="Text Input", lines=20, scale=2)
	features_desc = gr.HTML(FEATURES_HTML, elem_classes=["features_description"])

	button = gr.Button("Analyze Text", elem_classes=["analyze_btn"])
	gr.HTML(NOTE_HTML)

	with gr.Column():
	with gr.Row():
	label_output = gr.Textbox(
	label="Detected Label",
	placeholder="AI / Human",
	scale=1
	)
	score_output = gr.Textbox(
	label="Confidence Score",
	placeholder="0.000",
	scale=1
	)

	analysis_output = gr.TextArea(
	label="Analysis Result",
	placeholder="Model's explanation will appear here...",
	lines=6
	)

	button.click(
	fn=run_agent,
	inputs=inp,
	outputs=[analysis_output, label_output, score_output]
	)

	if __name__ == "__main__":
	port = os.getenv("PORT", 7860)
	demo.launch(server_name="0.0.0.0", server_port=port)