Spaces:
Sleeping
Sleeping
base
Browse files- app.py +0 -0
- emojis.json +0 -0
- model.py +38 -0
- requirements.txt +2 -0
app.py
ADDED
|
File without changes
|
emojis.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from sentence_transformers import SentenceTransformer, util
|
| 3 |
+
|
| 4 |
+
class EmojiPredictor:
|
| 5 |
+
def __init__(self, model_path, emoji_data_path):
|
| 6 |
+
self.model = SentenceTransformer(model_path)
|
| 7 |
+
self.emoji_data = self._load_emoji_data(emoji_data_path)
|
| 8 |
+
self.description_vectors = self._vectorize_descriptions()
|
| 9 |
+
|
| 10 |
+
def _load_emoji_data(self, emoji_data_path):
|
| 11 |
+
with open(emoji_data_path, 'r') as f:
|
| 12 |
+
return json.load(f)
|
| 13 |
+
|
| 14 |
+
def _vectorize_descriptions(self):
|
| 15 |
+
# Get the sentence embedding for each description
|
| 16 |
+
descriptions = [item['description'] for item in self.emoji_data]
|
| 17 |
+
return self.model.encode(descriptions)
|
| 18 |
+
|
| 19 |
+
def predict(self, text):
|
| 20 |
+
# Get the sentence embedding for the input text
|
| 21 |
+
text_vector = self.model.encode([text])[0]
|
| 22 |
+
|
| 23 |
+
from sentence_transformers import util
|
| 24 |
+
|
| 25 |
+
# Reshape vectors for cosine similarity calculation
|
| 26 |
+
text_vector_reshaped = text_vector.reshape((1, -1))
|
| 27 |
+
|
| 28 |
+
# Calculate cosine similarity using sentence_transformers.util.cos_sim
|
| 29 |
+
similarities = util.cos_sim(self.description_vectors, text_vector_reshaped).flatten()
|
| 30 |
+
|
| 31 |
+
# Commented out: Manual cosine similarity calculation
|
| 32 |
+
# dot_products = np.dot(self.description_vectors, text_vector_reshaped.T).flatten()
|
| 33 |
+
# norms = np.linalg.norm(self.description_vectors, axis=1) * np.linalg.norm(text_vector_reshaped)
|
| 34 |
+
# similarities = np.divide(dot_products, norms, out=np.zeros_like(dot_products), where=norms!=0)
|
| 35 |
+
|
| 36 |
+
# Find the index of the most similar description
|
| 37 |
+
most_similar_index = similarities.argmax()
|
| 38 |
+
return self.emoji_data[most_similar_index]['emoji']
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
sentence-transformers
|