Spaces:

SorbonneUniversity
/

SorboBot

Sleeping

App Files Files Community

SorboBot / app.py

bgregorutti

Update app.py

97c0116 about 2 years ago

raw

history blame

2.21 kB

	import streamlit as st
	from transformers import pipeline
	import sqlite3
	from sentence_transformers import SentenceTransformer
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np
	pipe=pipeline('sentiment-analysis')

	text = """
	Welcome to SorboBot, a Hugging Face Space designed to revolutionize the way you find published articles.

	Powered by a full export from ScanR and Hal at Sorbonne University, SorboBot utilizes advanced language model technology to provide you with a list of published articles based on your prompt

	Work in progress

	Write your request:
	"""
	text=st.text_area(text)


	if text:
	n_gram_range = (2, 2)
	stop_words = "english"
	# Extract candidate words/phrases
	count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text])
	candidates = count.get_feature_names_out()
	model = SentenceTransformer('distilbert-base-nli-mean-tokens')
	doc_embedding = model.encode([text])
	candidate_embeddings = model.encode(candidates)
	top_n = 5
	distances = cosine_similarity(doc_embedding, candidate_embeddings)
	keywords = [candidates[index] for index in distances.argsort()[0][-top_n:]]
	conn = sqlite3.connect('SU_CSV.db')
	cursor = conn.cursor()

	mots_cles_recherches = keywords

	# Création de la requête SQL
	query = f"SELECT title_s FROM BDD_Provisoire_SU WHERE {' OR '.join(['keyword_s LIKE ?'] * len(mots_cles_recherches))}"
	params = ['%' + mot + '%' for mot in mots_cles_recherches]

	cursor.execute(query, params)
	resultats = cursor.fetchall()

	# Affichage des titres d'articles trouvés
	if resultats:
	st.write("Titles of articles corresponding to your search:")
	for row in resultats[:3]:
	st.json(row[0])
	else:
	st.write("No article found in the database\n\n")
	st.json({})


	conn.close()
	generator = pipeline("text-generation", model="gpt2") # to modify for another model
	txt = generator(
	text,
	max_length=150,
	num_return_sequences=1,
	)[0]["generated_text"]

	st.write("Model output")
	st.write(txt)