mcp-cpu-madness / advanced_tools.py
Cordobian's picture
Update advanced_tools.py
bd4dab3 verified
raw
history blame
8.29 kB
# advanced_tools.py
import json
import time
from typing import Dict, List, Any, Optional
from transformers import pipeline # type: ignore[import]
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
class AdvancedTools:
def __init__(self):
"""Modelleri lazy loading ile yükle"""
self.models = {}
self.embedder = None
self.cache = {}
def _load_sentiment(self):
"""Sentiment model'i yükle"""
if "sentiment" not in self.models:
print("🔄 Loading sentiment model...")
self.models["sentiment"] = pipeline( # type: ignore[call-overload]
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english",
device=-1 # CPU
)
return self.models["sentiment"]
def _load_ner(self):
"""NER model'i yükle"""
if "ner" not in self.models:
print("🔄 Loading NER model...")
self.models["ner"] = pipeline( # type: ignore[call-overload]
"ner",
model="dslim/bert-base-NER",
aggregation_strategy="simple",
device=-1
)
return self.models["ner"]
def _load_embedder(self):
"""Embedding model'i yükle"""
if self.embedder is None:
print("🔄 Loading embedding model...")
# clean_up_tokenization_spaces parametresini açıkça belirt (future warning için)
self.embedder = SentenceTransformer(
'all-MiniLM-L6-v2',
tokenizer_kwargs={'clean_up_tokenization_spaces': True}
)
return self.embedder
def sentiment_analysis(self, input_data: Dict) -> Dict:
"""Duygu analizi"""
text = input_data.get("text", "")
if not text:
return {"error": "No text provided"}
model = self._load_sentiment()
# Metni cümlelere böl
sentences = text.split('. ')
results = []
for sentence in sentences:
if len(sentence.strip()) > 3:
result = model(sentence[:512])[0] # type: ignore[misc]
results.append({
"sentence": sentence[:50] + "..." if len(sentence) > 50 else sentence,
"sentiment": result["label"],
"confidence": float(result["score"]) # numpy type -> Python float
})
# Genel duygu hesapla
if results:
positive_count = sum(1 for r in results if r["sentiment"] == "POSITIVE")
negative_count = sum(1 for r in results if r["sentiment"] == "NEGATIVE")
overall = "POSITIVE" if positive_count > negative_count else "NEGATIVE"
confidence = float(max(r["confidence"] for r in results)) # Ensure Python float
else:
overall = "NEUTRAL"
confidence = 0.5
return {
"overall_sentiment": overall,
"confidence": confidence,
"sentence_analysis": results,
"summary": {
"positive_sentences": sum(1 for r in results if r["sentiment"] == "POSITIVE"),
"negative_sentences": sum(1 for r in results if r["sentiment"] == "NEGATIVE"),
"total_sentences": len(results)
}
}
def entity_extraction(self, input_data: Dict) -> Dict:
"""Named Entity Recognition"""
text = input_data.get("text", "")
if not text:
return {"error": "No text provided"}
model = self._load_ner()
entities = model(text[:512]) # type: ignore[misc]
# Entity'leri grupla ve numpy tiplerini Python tiplerine çevir
grouped = {}
serializable_entities = []
for entity in entities:
entity_type = entity["entity_group"]
if entity_type not in grouped:
grouped[entity_type] = []
grouped[entity_type].append({
"word": entity["word"],
"score": float(entity["score"]) # numpy.float32 -> Python float
})
# İlk 10 entity için serileştirilebilir versiyon
if len(serializable_entities) < 10:
serializable_entities.append({
"entity_group": entity["entity_group"],
"word": entity["word"],
"score": float(entity["score"]), # numpy.float32 -> Python float
"start": int(entity["start"]) if "start" in entity else None,
"end": int(entity["end"]) if "end" in entity else None
})
return {
"entities": serializable_entities, # İlk 10 entity (serileştirilebilir)
"grouped": grouped,
"summary": {
"total_entities": len(entities),
"entity_types": list(grouped.keys()),
"most_common_type": max(grouped.keys(), key=lambda k: len(grouped[k])) if grouped else None
}
}
def semantic_similarity(self, input_data: Dict) -> Dict:
"""İki metin arasındaki benzerlik"""
text1 = input_data.get("text1", "")
text2 = input_data.get("text2", "")
if not text1 or not text2:
return {"error": "Both text1 and text2 are required"}
embedder = self._load_embedder()
# Embed metinleri
embeddings = embedder.encode([text1, text2]) # type: ignore[misc]
# Cosine similarity hesapla
from sklearn.metrics.pairwise import cosine_similarity
similarity_score = cosine_similarity(
[embeddings[0]],
[embeddings[1]]
)[0][0]
return {
"text1": text1[:100] + "..." if len(text1) > 100 else text1,
"text2": text2[:100] + "..." if len(text2) > 100 else text2,
"similarity_score": float(similarity_score),
"similarity_percentage": round(float(similarity_score) * 100, 2)
}
def text_embedding(self, input_data: Dict) -> Dict:
"""Metni vector'e çevir (embedding)"""
text = input_data.get("text", "")
if not text:
return {"error": "No text provided"}
embedder = self._load_embedder()
embedding = embedder.encode(text) # type: ignore[misc]
return {
"text": text[:100] + "..." if len(text) > 100 else text,
"embedding": embedding.tolist()[:50], # İlk 50 dimension
"embedding_dimension": len(embedding),
"embedding_size_kb": round(len(embedding) * 4 / 1024, 2)
}
def smart_cache(self, input_data: Dict) -> Dict:
"""Caching ve cache stats"""
operation = input_data.get("operation", "stats")
if operation == "clear":
size_before = len(self.cache)
self.cache.clear()
return {
"operation": "clear",
"items_cleared": size_before,
"cache_size": 0
}
elif operation == "stats":
return {
"operation": "stats",
"cached_items": len(self.cache),
"cache_keys": list(self.cache.keys())[:10],
"cache_memory_estimate_kb": round(len(str(self.cache)) / 1024, 2)
}
elif operation == "set":
key = input_data.get("key")
value = input_data.get("value")
if not key or value is None:
return {"error": "key and value required for set operation"}
self.cache[key] = value
return {
"operation": "set",
"key": key,
"cache_size": len(self.cache)
}
elif operation == "get":
key = input_data.get("key")
if not key:
return {"error": "key required for get operation"}
value = self.cache.get(key)
return {
"operation": "get",
"key": key,
"found": value is not None,
"value": value
}
return {"error": "Unknown operation"}
# Global instance oluştur
advanced_tools = AdvancedTools()