Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| import sqlite3 | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| pipe=pipeline('sentiment-analysis') | |
| text = """ | |
| Welcome to SorboBot, a Hugging Face Space designed to revolutionize the way you find published articles. | |
| Powered by a full export from ScanR and Hal at Sorbonne University, SorboBot utilizes advanced language model technology to provide you with a list of published articles based on your prompt | |
| Work in progress | |
| Write your request: | |
| """ | |
| text=st.text_area(text) | |
| if text: | |
| n_gram_range = (2, 2) | |
| stop_words = "english" | |
| # Extract candidate words/phrases | |
| count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text]) | |
| candidates = count.get_feature_names_out() | |
| model = SentenceTransformer('distilbert-base-nli-mean-tokens') | |
| doc_embedding = model.encode([text]) | |
| candidate_embeddings = model.encode(candidates) | |
| top_n = 5 | |
| distances = cosine_similarity(doc_embedding, candidate_embeddings) | |
| keywords = [candidates[index] for index in distances.argsort()[0][-top_n:]] | |
| conn = sqlite3.connect('SU_CSV.db') | |
| cursor = conn.cursor() | |
| mots_cles_recherches = keywords | |
| # Création de la requête SQL | |
| query = f"SELECT title_s FROM BDD_Provisoire_SU WHERE {' OR '.join(['keyword_s LIKE ?'] * len(mots_cles_recherches))}" | |
| params = ['%' + mot + '%' for mot in mots_cles_recherches] | |
| cursor.execute(query, params) | |
| resultats = cursor.fetchall() | |
| # Affichage des titres d'articles trouvés | |
| if resultats: | |
| st.write("Titles of articles corresponding to your search:") | |
| for row in resultats[:3]: | |
| st.json(row[0]) | |
| else: | |
| st.write("No article found in the database\n\n") | |
| st.json({}) | |
| conn.close() | |
| generator = pipeline("text-generation", model="gpt2") # to modify for another model | |
| txt = generator( | |
| text, | |
| max_length=150, | |
| num_return_sequences=1, | |
| )[0]["generated_text"] | |
| st.write("Model output") | |
| st.write(txt) |