Spaces:
Build error
Build error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -20,20 +20,20 @@ stsb_dataset = load_dataset('stsb_multi_mt', 'en')
|
|
| 20 |
stsb_train = pd.DataFrame(stsb_dataset['train'])
|
| 21 |
stsb_test = pd.DataFrame(stsb_dataset['test'])
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
|
| 38 |
|
| 39 |
# let's read the csv file
|
|
|
|
| 20 |
stsb_train = pd.DataFrame(stsb_dataset['train'])
|
| 21 |
stsb_test = pd.DataFrame(stsb_dataset['test'])
|
| 22 |
|
| 23 |
+
# let's create helper functions
|
| 24 |
+
nlp = spacy.load("en_core_web_sm")
|
| 25 |
|
| 26 |
|
| 27 |
+
def text_processing(sentence):
|
| 28 |
+
sentence = [token.lemma_.lower()
|
| 29 |
+
for token in nlp(sentence)
|
| 30 |
+
if token.is_alpha and not token.is_stop]
|
| 31 |
+
return sentence
|
| 32 |
|
| 33 |
|
| 34 |
+
def cos_sim(sentence1_emb, sentence2_emb):
|
| 35 |
+
cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
|
| 36 |
+
return np.diag(cos_sim)
|
| 37 |
|
| 38 |
|
| 39 |
# let's read the csv file
|