Spaces:

SorbonneUniversity
/

SorboBot

Sleeping

File size: 2,210 Bytes

5dacc4a
 
8540184
f058a76
7e62e79
80d5eef
 
5dacc4a
213df71
 
 
 
 
 
 
 
 
 
 
f058a76
 
cac1fed
f058a76
 
 
 
 
 
 
 
 
 
 
8efdfea
 
 
 
 
 
 
 
 
 
 
 
 
 
213df71
648283c
3f41bd5
8efdfea
97c0116
 
8efdfea
 
139459b
bbfa082
213df71
7e25f90
50281b0
e292ca9
 
24e14d1
e292ca9
24e14d1

import streamlit as st
from transformers import pipeline
import sqlite3
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
pipe=pipeline('sentiment-analysis')

text = """
Welcome to SorboBot, a Hugging Face Space designed to revolutionize the way you find published articles.

Powered by a full export from ScanR and Hal at Sorbonne University, SorboBot utilizes advanced language model technology to provide you with a list of published articles based on your prompt

Work in progress

Write your request:
"""
text=st.text_area(text)


if text:
    n_gram_range = (2, 2)
    stop_words = "english"
    # Extract candidate words/phrases
    count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text])
    candidates = count.get_feature_names_out()
    model = SentenceTransformer('distilbert-base-nli-mean-tokens')
    doc_embedding = model.encode([text])
    candidate_embeddings = model.encode(candidates)
    top_n = 5
    distances = cosine_similarity(doc_embedding, candidate_embeddings)
    keywords = [candidates[index] for index in distances.argsort()[0][-top_n:]]
    conn = sqlite3.connect('SU_CSV.db')
    cursor = conn.cursor()
    
    mots_cles_recherches = keywords
    
    # Création de la requête SQL
    query = f"SELECT title_s FROM BDD_Provisoire_SU WHERE {' OR '.join(['keyword_s LIKE ?'] * len(mots_cles_recherches))}"
    params = ['%' + mot + '%' for mot in mots_cles_recherches]
    
    cursor.execute(query, params)
    resultats = cursor.fetchall()
    
    # Affichage des titres d'articles trouvés
    if resultats:
        st.write("Titles of articles corresponding to your search:")
        for row in resultats[:3]:
            st.json(row[0])
    else:
        st.write("No article found in the database\n\n")
        st.json({})
    
    
    conn.close()
    generator = pipeline("text-generation", model="gpt2") # to modify for another model
    txt = generator(
        text,
        max_length=150,
        num_return_sequences=1,
    )[0]["generated_text"]

    st.write("Model output")
    st.write(txt)