Cordobian commited on
Commit
bd4dab3
·
verified ·
1 Parent(s): f4575b9

Update advanced_tools.py

Browse files
Files changed (1) hide show
  1. advanced_tools.py +110 -160
advanced_tools.py CHANGED
@@ -1,37 +1,11 @@
1
  # advanced_tools.py
2
  import json
3
  import time
4
- from typing import Dict, List, Any
 
 
5
  import numpy as np
6
-
7
- # Safe imports with fallbacks
8
- try:
9
- from transformers import pipeline as transformers_pipeline
10
- HAS_TRANSFORMERS = True
11
- except ImportError:
12
- HAS_TRANSFORMERS = False
13
- transformers_pipeline = None
14
-
15
- try:
16
- import torch
17
- HAS_TORCH = True
18
- except ImportError:
19
- HAS_TORCH = False
20
- torch = None
21
-
22
- try:
23
- from sentence_transformers import SentenceTransformer
24
- HAS_SENTENCE_TRANSFORMERS = True
25
- except ImportError:
26
- HAS_SENTENCE_TRANSFORMERS = False
27
- SentenceTransformer = None
28
-
29
- try:
30
- from sklearn.metrics.pairwise import cosine_similarity
31
- HAS_SKLEARN = True
32
- except ImportError:
33
- HAS_SKLEARN = False
34
- cosine_similarity = None
35
 
36
  class AdvancedTools:
37
  def __init__(self):
@@ -43,49 +17,35 @@ class AdvancedTools:
43
  def _load_sentiment(self):
44
  """Sentiment model'i yükle"""
45
  if "sentiment" not in self.models:
46
- if not HAS_TRANSFORMERS:
47
- raise ImportError("transformers library not available")
48
- print("[LOAD] Loading sentiment model...")
49
- try:
50
- self.models["sentiment"] = transformers_pipeline(
51
- "sentiment-analysis",
52
- model="distilbert-base-uncased-finetuned-sst-2-english",
53
- device=-1 # CPU
54
- )
55
- except Exception as e:
56
- print(f"[ERROR] Failed to load sentiment model: {e}")
57
- raise
58
  return self.models["sentiment"]
59
 
60
  def _load_ner(self):
61
  """NER model'i yükle"""
62
  if "ner" not in self.models:
63
- if not HAS_TRANSFORMERS:
64
- raise ImportError("transformers library not available")
65
- print("[LOAD] Loading NER model...")
66
- try:
67
- self.models["ner"] = transformers_pipeline(
68
- "ner",
69
- model="dslim/bert-base-NER",
70
- aggregation_strategy="simple",
71
- device=-1
72
- )
73
- except Exception as e:
74
- print(f"[ERROR] Failed to load NER model: {e}")
75
- raise
76
  return self.models["ner"]
77
 
78
  def _load_embedder(self):
79
  """Embedding model'i yükle"""
80
  if self.embedder is None:
81
- if not HAS_SENTENCE_TRANSFORMERS:
82
- raise ImportError("sentence-transformers library not available")
83
- print("[LOAD] Loading embedding model...")
84
- try:
85
- self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
86
- except Exception as e:
87
- print(f"[ERROR] Failed to load embedding model: {e}")
88
- raise
89
  return self.embedder
90
 
91
  def sentiment_analysis(self, input_data: Dict) -> Dict:
@@ -94,48 +54,42 @@ class AdvancedTools:
94
  if not text:
95
  return {"error": "No text provided"}
96
 
97
- if not HAS_TRANSFORMERS:
98
- return {"error": "transformers library not available"}
99
-
100
- try:
101
- model = self._load_sentiment()
102
 
103
- # Metni cümlelere böl
104
- sentences = text.split('. ')
105
- results = []
106
 
107
- for sentence in sentences:
108
- if len(sentence.strip()) > 3:
109
- result = model(sentence[:512])[0]
110
- results.append({
111
- "sentence": sentence[:50] + "..." if len(sentence) > 50 else sentence,
112
- "sentiment": result["label"],
113
- "confidence": result["score"]
114
- })
115
-
116
- # Genel duygu hesapla
117
- if results:
118
- positive_count = sum(1 for r in results if r["sentiment"] == "POSITIVE")
119
- negative_count = sum(1 for r in results if r["sentiment"] == "NEGATIVE")
120
-
121
- overall = "POSITIVE" if positive_count > negative_count else "NEGATIVE"
122
- confidence = max(r["confidence"] for r in results)
123
- else:
124
- overall = "NEUTRAL"
125
- confidence = 0.5
126
 
127
- return {
128
- "overall_sentiment": overall,
129
- "confidence": confidence,
130
- "sentence_analysis": results,
131
- "summary": {
132
- "positive_sentences": sum(1 for r in results if r["sentiment"] == "POSITIVE"),
133
- "negative_sentences": sum(1 for r in results if r["sentiment"] == "NEGATIVE"),
134
- "total_sentences": len(results)
135
- }
 
 
 
 
 
 
 
 
 
 
136
  }
137
- except Exception as e:
138
- return {"error": f"Sentiment analysis failed: {str(e)}"}
139
 
140
  def entity_extraction(self, input_data: Dict) -> Dict:
141
  """Named Entity Recognition"""
@@ -143,35 +97,41 @@ class AdvancedTools:
143
  if not text:
144
  return {"error": "No text provided"}
145
 
146
- if not HAS_TRANSFORMERS:
147
- return {"error": "transformers library not available"}
148
-
149
- try:
150
- model = self._load_ner()
151
- entities = model(text[:512])
152
-
153
- # Entity'leri grupla
154
- grouped = {}
155
- for entity in entities:
156
- entity_type = entity["entity_group"]
157
- if entity_type not in grouped:
158
- grouped[entity_type] = []
159
- grouped[entity_type].append({
 
 
 
 
 
 
160
  "word": entity["word"],
161
- "score": entity["score"]
 
 
162
  })
163
 
164
- return {
165
- "entities": entities[:10], # İlk 10 entity
166
- "grouped": grouped,
167
- "summary": {
168
- "total_entities": len(entities),
169
- "entity_types": list(grouped.keys()),
170
- "most_common_type": max(grouped.keys(), key=lambda k: len(grouped[k])) if grouped else None
171
- }
172
  }
173
- except Exception as e:
174
- return {"error": f"Entity extraction failed: {str(e)}"}
175
 
176
  def semantic_similarity(self, input_data: Dict) -> Dict:
177
  """İki metin arasındaki benzerlik"""
@@ -181,28 +141,24 @@ class AdvancedTools:
181
  if not text1 or not text2:
182
  return {"error": "Both text1 and text2 are required"}
183
 
184
- if not HAS_SKLEARN:
185
- return {"error": "scikit-learn not available for similarity calculation"}
186
 
187
- try:
188
- embedder = self._load_embedder()
189
- # Embed metinleri
190
- embeddings = embedder.encode([text1, text2])
191
 
192
- # Cosine similarity hesapla
193
- similarity_score = cosine_similarity(
194
- [embeddings[0]],
195
- [embeddings[1]]
196
- )[0][0]
 
197
 
198
- return {
199
- "text1": text1[:100] + "..." if len(text1) > 100 else text1,
200
- "text2": text2[:100] + "..." if len(text2) > 100 else text2,
201
- "similarity_score": float(similarity_score),
202
- "similarity_percentage": round(float(similarity_score) * 100, 2)
203
- }
204
- except Exception as e:
205
- return {"error": f"Similarity calculation failed: {str(e)}"}
206
 
207
  def text_embedding(self, input_data: Dict) -> Dict:
208
  """Metni vector'e çevir (embedding)"""
@@ -210,21 +166,15 @@ class AdvancedTools:
210
  if not text:
211
  return {"error": "No text provided"}
212
 
213
- if not HAS_SENTENCE_TRANSFORMERS:
214
- return {"error": "sentence-transformers not available for embeddings"}
215
-
216
- try:
217
- embedder = self._load_embedder()
218
- embedding = embedder.encode(text)
219
 
220
- return {
221
- "text": text[:100] + "..." if len(text) > 100 else text,
222
- "embedding": embedding.tolist()[:50], # İlk 50 dimension
223
- "embedding_dimension": len(embedding),
224
- "embedding_size_kb": round(len(embedding) * 4 / 1024, 2)
225
- }
226
- except Exception as e:
227
- return {"error": f"Embedding failed: {str(e)}"}
228
 
229
  def smart_cache(self, input_data: Dict) -> Dict:
230
  """Caching ve cache stats"""
@@ -275,4 +225,4 @@ class AdvancedTools:
275
 
276
 
277
  # Global instance oluştur
278
- advanced_tools = AdvancedTools()
 
1
  # advanced_tools.py
2
  import json
3
  import time
4
+ from typing import Dict, List, Any, Optional
5
+ from transformers import pipeline # type: ignore[import]
6
+ import torch
7
  import numpy as np
8
+ from sentence_transformers import SentenceTransformer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class AdvancedTools:
11
  def __init__(self):
 
17
  def _load_sentiment(self):
18
  """Sentiment model'i yükle"""
19
  if "sentiment" not in self.models:
20
+ print("🔄 Loading sentiment model...")
21
+ self.models["sentiment"] = pipeline( # type: ignore[call-overload]
22
+ "sentiment-analysis",
23
+ model="distilbert-base-uncased-finetuned-sst-2-english",
24
+ device=-1 # CPU
25
+ )
 
 
 
 
 
 
26
  return self.models["sentiment"]
27
 
28
  def _load_ner(self):
29
  """NER model'i yükle"""
30
  if "ner" not in self.models:
31
+ print("🔄 Loading NER model...")
32
+ self.models["ner"] = pipeline( # type: ignore[call-overload]
33
+ "ner",
34
+ model="dslim/bert-base-NER",
35
+ aggregation_strategy="simple",
36
+ device=-1
37
+ )
 
 
 
 
 
 
38
  return self.models["ner"]
39
 
40
  def _load_embedder(self):
41
  """Embedding model'i yükle"""
42
  if self.embedder is None:
43
+ print("🔄 Loading embedding model...")
44
+ # clean_up_tokenization_spaces parametresini açıkça belirt (future warning için)
45
+ self.embedder = SentenceTransformer(
46
+ 'all-MiniLM-L6-v2',
47
+ tokenizer_kwargs={'clean_up_tokenization_spaces': True}
48
+ )
 
 
49
  return self.embedder
50
 
51
  def sentiment_analysis(self, input_data: Dict) -> Dict:
 
54
  if not text:
55
  return {"error": "No text provided"}
56
 
57
+ model = self._load_sentiment()
 
 
 
 
58
 
59
+ # Metni cümlelere böl
60
+ sentences = text.split('. ')
61
+ results = []
62
 
63
+ for sentence in sentences:
64
+ if len(sentence.strip()) > 3:
65
+ result = model(sentence[:512])[0] # type: ignore[misc]
66
+ results.append({
67
+ "sentence": sentence[:50] + "..." if len(sentence) > 50 else sentence,
68
+ "sentiment": result["label"],
69
+ "confidence": float(result["score"]) # numpy type -> Python float
70
+ })
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # Genel duygu hesapla
73
+ if results:
74
+ positive_count = sum(1 for r in results if r["sentiment"] == "POSITIVE")
75
+ negative_count = sum(1 for r in results if r["sentiment"] == "NEGATIVE")
76
+
77
+ overall = "POSITIVE" if positive_count > negative_count else "NEGATIVE"
78
+ confidence = float(max(r["confidence"] for r in results)) # Ensure Python float
79
+ else:
80
+ overall = "NEUTRAL"
81
+ confidence = 0.5
82
+
83
+ return {
84
+ "overall_sentiment": overall,
85
+ "confidence": confidence,
86
+ "sentence_analysis": results,
87
+ "summary": {
88
+ "positive_sentences": sum(1 for r in results if r["sentiment"] == "POSITIVE"),
89
+ "negative_sentences": sum(1 for r in results if r["sentiment"] == "NEGATIVE"),
90
+ "total_sentences": len(results)
91
  }
92
+ }
 
93
 
94
  def entity_extraction(self, input_data: Dict) -> Dict:
95
  """Named Entity Recognition"""
 
97
  if not text:
98
  return {"error": "No text provided"}
99
 
100
+ model = self._load_ner()
101
+ entities = model(text[:512]) # type: ignore[misc]
102
+
103
+ # Entity'leri grupla ve numpy tiplerini Python tiplerine çevir
104
+ grouped = {}
105
+ serializable_entities = []
106
+
107
+ for entity in entities:
108
+ entity_type = entity["entity_group"]
109
+ if entity_type not in grouped:
110
+ grouped[entity_type] = []
111
+ grouped[entity_type].append({
112
+ "word": entity["word"],
113
+ "score": float(entity["score"]) # numpy.float32 -> Python float
114
+ })
115
+
116
+ # İlk 10 entity için serileştirilebilir versiyon
117
+ if len(serializable_entities) < 10:
118
+ serializable_entities.append({
119
+ "entity_group": entity["entity_group"],
120
  "word": entity["word"],
121
+ "score": float(entity["score"]), # numpy.float32 -> Python float
122
+ "start": int(entity["start"]) if "start" in entity else None,
123
+ "end": int(entity["end"]) if "end" in entity else None
124
  })
125
 
126
+ return {
127
+ "entities": serializable_entities, # İlk 10 entity (serileştirilebilir)
128
+ "grouped": grouped,
129
+ "summary": {
130
+ "total_entities": len(entities),
131
+ "entity_types": list(grouped.keys()),
132
+ "most_common_type": max(grouped.keys(), key=lambda k: len(grouped[k])) if grouped else None
 
133
  }
134
+ }
 
135
 
136
  def semantic_similarity(self, input_data: Dict) -> Dict:
137
  """İki metin arasındaki benzerlik"""
 
141
  if not text1 or not text2:
142
  return {"error": "Both text1 and text2 are required"}
143
 
144
+ embedder = self._load_embedder()
 
145
 
146
+ # Embed metinleri
147
+ embeddings = embedder.encode([text1, text2]) # type: ignore[misc]
 
 
148
 
149
+ # Cosine similarity hesapla
150
+ from sklearn.metrics.pairwise import cosine_similarity
151
+ similarity_score = cosine_similarity(
152
+ [embeddings[0]],
153
+ [embeddings[1]]
154
+ )[0][0]
155
 
156
+ return {
157
+ "text1": text1[:100] + "..." if len(text1) > 100 else text1,
158
+ "text2": text2[:100] + "..." if len(text2) > 100 else text2,
159
+ "similarity_score": float(similarity_score),
160
+ "similarity_percentage": round(float(similarity_score) * 100, 2)
161
+ }
 
 
162
 
163
  def text_embedding(self, input_data: Dict) -> Dict:
164
  """Metni vector'e çevir (embedding)"""
 
166
  if not text:
167
  return {"error": "No text provided"}
168
 
169
+ embedder = self._load_embedder()
170
+ embedding = embedder.encode(text) # type: ignore[misc]
 
 
 
 
171
 
172
+ return {
173
+ "text": text[:100] + "..." if len(text) > 100 else text,
174
+ "embedding": embedding.tolist()[:50], # İlk 50 dimension
175
+ "embedding_dimension": len(embedding),
176
+ "embedding_size_kb": round(len(embedding) * 4 / 1024, 2)
177
+ }
 
 
178
 
179
  def smart_cache(self, input_data: Dict) -> Dict:
180
  """Caching ve cache stats"""
 
225
 
226
 
227
  # Global instance oluştur
228
+ advanced_tools = AdvancedTools()