Spaces:

aaappp7878
/

ai-text-detector

Running

App Files Files Community

ai-text-detector / app.py

aaappp7878

Update app.py

2e3fe2e verified 6 months ago

raw

history blame contribute delete

4.49 kB

	# app.py - 文本检测多模型集成系统
	import gradio as gr
	from transformers import pipeline
	import numpy as np
	import re

	# 加载多个检测模型
	models = {
	"model1": {
	"name": "Xenova/distilbert-base-ai-generated-text-detection",
	"detector": None,
	"weight": 0.4
	},
	"model2": {
	"name": "Hello-SimpleAI/chatgpt-detector-roberta",
	"detector": None,
	"weight": 0.3
	},
	"model3": {
	"name": "roberta-base-openai-detector",
	"detector": None,
	"weight": 0.3
	}
	}

	# 初始化模型
	for key in models:
	try:
	models[key]["detector"] = pipeline("text-classification", model=models[key]["name"])
	print(f"成功加载模型: {models[key]['name']}")
	except Exception as e:
	print(f"加载模型 {models[key]['name']} 失败: {str(e)}")
	models[key]["detector"] = None

	def analyze_text_features(text):
	# 文本特征分析
	features = {}
	features["length"] = len(text)
	words = text.split()
	features["word_count"] = len(words)
	features["avg_word_length"] = sum(len(word) for word in words) / max(1, len(words))
	features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(words))

	# 句子分析
	sentences = re.split(r'[.!?]+', text)
	features["sentence_count"] = len(sentences)
	features["avg_sentence_length"] = sum(len(s.split()) for s in sentences) / max(1, len(sentences))

	# 词汇多样性
	if len(words) > 0:
	features["lexical_diversity"] = len(set(words)) / len(words)

	# 标点符号比例
	punctuation_count = sum(1 for char in text if char in ",.!?;:\"'()[]{}")
	features["punctuation_ratio"] = punctuation_count / max(1, len(text))

	return features

	def detect_ai_text(text):
	if not text or len(text.strip()) < 50:
	return {"error": "文本太短，无法可靠检测"}

	results = {}
	valid_models = 0
	weighted_ai_probability = 0

	# 使用每个模型进行预测
	for key, model_info in models.items():
	if model_info["detector"] is not None:
	try:
	result = model_info["detector"](text)

	# 提取结果
	label = result[0]["label"]
	score = result[0]["score"]

	# 确定AI生成概率
	if "ai" in label.lower() or "chatgpt" in label.lower() or "generated" in label.lower():
	ai_probability = score
	else:
	ai_probability = 1 - score

	# 添加到结果
	results[key] = {
	"model_name": model_info["name"],
	"ai_probability": ai_probability,
	"label": label,
	"score": score
	}

	# 累加加权概率
	weighted_ai_probability += ai_probability * model_info["weight"]
	valid_models += 1

	except Exception as e:
	results[key] = {
	"model_name": model_info["name"],
	"error": str(e)
	}

	# 计算最终加权概率
	final_ai_probability = weighted_ai_probability / max(sum(m["weight"] for k, m in models.items() if m["detector"] is not None), 1)

	# 分析文本特征
	text_features = analyze_text_features(text)

	# 确定置信度级别
	if final_ai_probability > 0.7:
	confidence_level = "高概率AI生成"
	elif final_ai_probability < 0.3:
	confidence_level = "高概率人类创作"
	else:
	confidence_level = "无法确定"

	# 构建最终结果
	final_result = {
	"ai_probability": final_ai_probability,
	"confidence_level": confidence_level,
	"individual_model_results": results,
	"features": text_features
	}

	return final_result

	# 创建Gradio界面
	iface = gr.Interface(
	fn=detect_ai_text,
	inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."),
	outputs=gr.JSON(),
	title="增强型AI文本检测API",
	description="多模型集成检测文本是否由AI生成",
	examples=[
	["这是一段示例文本，用于测试AI文本检测功能。请输入至少50个字符的文本以获得准确的检测结果。"]
	],
	allow_flagging="never"
	)

	iface.launch()