Spaces:
Sleeping
Sleeping
| import json | |
| from sentence_transformers import SentenceTransformer, util | |
| class EmojiPredictor: | |
| def __init__(self, model_path, emoji_data_path): | |
| self.model = SentenceTransformer(model_path) | |
| self.emoji_data = self._load_emoji_data(emoji_data_path) | |
| self.description_vectors = self._vectorize_descriptions() | |
| def _load_emoji_data(self, emoji_data_path): | |
| with open(emoji_data_path, 'r') as f: | |
| return json.load(f) | |
| def _vectorize_descriptions(self): | |
| # Get the sentence embedding for each description | |
| descriptions = [item['description'] for item in self.emoji_data] | |
| return self.model.encode(descriptions) | |
| def predict(self, text): | |
| # Get the sentence embedding for the input text | |
| text_vector = self.model.encode([text])[0] | |
| from sentence_transformers import util | |
| # Reshape vectors for cosine similarity calculation | |
| text_vector_reshaped = text_vector.reshape((1, -1)) | |
| # Calculate cosine similarity using sentence_transformers.util.cos_sim | |
| similarities = util.cos_sim(self.description_vectors, text_vector_reshaped).flatten() | |
| # Commented out: Manual cosine similarity calculation | |
| # dot_products = np.dot(self.description_vectors, text_vector_reshaped.T).flatten() | |
| # norms = np.linalg.norm(self.description_vectors, axis=1) * np.linalg.norm(text_vector_reshaped) | |
| # similarities = np.divide(dot_products, norms, out=np.zeros_like(dot_products), where=norms!=0) | |
| # Find the index of the most similar description | |
| most_similar_index = similarities.argmax() | |
| return self.emoji_data[most_similar_index]['emoji'] |