SorboBot / app.py
bgregorutti's picture
Update app.py
97c0116
raw
history blame
2.21 kB
import streamlit as st
from transformers import pipeline
import sqlite3
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
pipe=pipeline('sentiment-analysis')
text = """
Welcome to SorboBot, a Hugging Face Space designed to revolutionize the way you find published articles.
Powered by a full export from ScanR and Hal at Sorbonne University, SorboBot utilizes advanced language model technology to provide you with a list of published articles based on your prompt
Work in progress
Write your request:
"""
text=st.text_area(text)
if text:
n_gram_range = (2, 2)
stop_words = "english"
# Extract candidate words/phrases
count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text])
candidates = count.get_feature_names_out()
model = SentenceTransformer('distilbert-base-nli-mean-tokens')
doc_embedding = model.encode([text])
candidate_embeddings = model.encode(candidates)
top_n = 5
distances = cosine_similarity(doc_embedding, candidate_embeddings)
keywords = [candidates[index] for index in distances.argsort()[0][-top_n:]]
conn = sqlite3.connect('SU_CSV.db')
cursor = conn.cursor()
mots_cles_recherches = keywords
# Création de la requête SQL
query = f"SELECT title_s FROM BDD_Provisoire_SU WHERE {' OR '.join(['keyword_s LIKE ?'] * len(mots_cles_recherches))}"
params = ['%' + mot + '%' for mot in mots_cles_recherches]
cursor.execute(query, params)
resultats = cursor.fetchall()
# Affichage des titres d'articles trouvés
if resultats:
st.write("Titles of articles corresponding to your search:")
for row in resultats[:3]:
st.json(row[0])
else:
st.write("No article found in the database\n\n")
st.json({})
conn.close()
generator = pipeline("text-generation", model="gpt2") # to modify for another model
txt = generator(
text,
max_length=150,
num_return_sequences=1,
)[0]["generated_text"]
st.write("Model output")
st.write(txt)