Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,78 +1,78 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
from langchain_chroma import Chroma
|
| 3 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
-
from sentence_transformers import CrossEncoder
|
| 5 |
-
import gradio as gr
|
| 6 |
-
import torch
|
| 7 |
-
|
| 8 |
-
# ๐น Pfade
|
| 9 |
-
CHROMA_DIR =
|
| 10 |
-
CSV_PATH =
|
| 11 |
-
PLACEHOLDER_IMAGE =
|
| 12 |
-
|
| 13 |
-
# ๐น CSV laden
|
| 14 |
-
movies = pd.read_csv(CSV_PATH, encoding="utf-8")
|
| 15 |
-
movies["doc_id"] = movies.index
|
| 16 |
-
|
| 17 |
-
# ๐น CUDA Check
|
| 18 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
-
|
| 20 |
-
# ๐น Embeddings laden
|
| 21 |
-
embeddings = HuggingFaceEmbeddings(
|
| 22 |
-
model_name="BAAI/bge-m3",
|
| 23 |
-
model_kwargs = {"device": device},
|
| 24 |
-
encode_kwargs={"normalize_embeddings": True}
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
# ๐น Chroma-Index laden
|
| 28 |
-
db_movies = Chroma(
|
| 29 |
-
persist_directory=CHROMA_DIR,
|
| 30 |
-
embedding_function=embeddings
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
-
# ๐น Cross-Encoder laden
|
| 34 |
-
reranker = CrossEncoder("cross-encoder/stsb-roberta-large", device=device)
|
| 35 |
-
|
| 36 |
-
# ๐น Funktion fรผr semantische Empfehlungen
|
| 37 |
-
def retrieve_semantic_recommendations(query: str, top_k: int = 10):
|
| 38 |
-
recs = db_movies.similarity_search(query, k=50)
|
| 39 |
-
pairs = [(query, rec.page_content) for rec in recs]
|
| 40 |
-
scores = reranker.predict(pairs, batch_size=8)
|
| 41 |
-
|
| 42 |
-
scores_dict = {}
|
| 43 |
-
for rec, score in zip(recs, scores):
|
| 44 |
-
doc_id = rec.metadata["doc_id"]
|
| 45 |
-
if doc_id not in scores_dict or score > scores_dict[doc_id]["score"]:
|
| 46 |
-
scores_dict[doc_id] = {"score": score, "rec": rec}
|
| 47 |
-
|
| 48 |
-
unique_top_recs = sorted(scores_dict.values(), key=lambda x: x["score"], reverse=True)[:top_k]
|
| 49 |
-
|
| 50 |
-
gallery = []
|
| 51 |
-
for item in unique_top_recs:
|
| 52 |
-
rec = item["rec"]
|
| 53 |
-
metadata = rec.metadata
|
| 54 |
-
|
| 55 |
-
# Poster aus CSV anhand doc_id
|
| 56 |
-
movie_row = movies[movies["doc_id"] == metadata["doc_id"]].iloc[0]
|
| 57 |
-
cover_url = movie_row.get("poster_url", PLACEHOLDER_IMAGE)
|
| 58 |
-
|
| 59 |
-
label = f"{movie_row['title']}\n\n{movie_row['description']}"
|
| 60 |
-
gallery.append((cover_url, label))
|
| 61 |
-
|
| 62 |
-
return gallery
|
| 63 |
-
|
| 64 |
-
# ๐น Gradio-App
|
| 65 |
-
with gr.Blocks(theme=gr.themes.Glass()) as demo:
|
| 66 |
-
gr.Markdown("# ๐ฌ Semantic Movie Recommender (Online Posters)")
|
| 67 |
-
|
| 68 |
-
with gr.Row():
|
| 69 |
-
user_query = gr.Textbox(label="Describe your ideal movie", placeholder="e.g., A sci-fi movie about time travel")
|
| 70 |
-
submit_button = gr.Button("๐ Find recommendations")
|
| 71 |
-
|
| 72 |
-
gr.Markdown("## ๐ฟ Recommended Movies")
|
| 73 |
-
output = gr.Gallery(label="Movies", columns=3, rows=3, show_label=True)
|
| 74 |
-
|
| 75 |
-
submit_button.click(fn=retrieve_semantic_recommendations, inputs=user_query, outputs=output)
|
| 76 |
-
|
| 77 |
-
if __name__ == "__main__":
|
| 78 |
-
demo.launch()
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from langchain_chroma import Chroma
|
| 3 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
+
from sentence_transformers import CrossEncoder
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# ๐น Pfade
|
| 9 |
+
CHROMA_DIR = "chroma_movies_bge" # Lokaler Chroma-Ordner
|
| 10 |
+
CSV_PATH = "FINALE_NEW_WITH_IMAGES.csv" # CSV mit Poster URLs
|
| 11 |
+
PLACEHOLDER_IMAGE = "placeholder.png" # Falls Poster fehlt
|
| 12 |
+
|
| 13 |
+
# ๐น CSV laden
|
| 14 |
+
movies = pd.read_csv(CSV_PATH, encoding="utf-8")
|
| 15 |
+
movies["doc_id"] = movies.index
|
| 16 |
+
|
| 17 |
+
# ๐น CUDA Check
|
| 18 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
+
|
| 20 |
+
# ๐น Embeddings laden
|
| 21 |
+
embeddings = HuggingFaceEmbeddings(
|
| 22 |
+
model_name="BAAI/bge-m3",
|
| 23 |
+
model_kwargs = {"device": device},
|
| 24 |
+
encode_kwargs={"normalize_embeddings": True}
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# ๐น Chroma-Index laden
|
| 28 |
+
db_movies = Chroma(
|
| 29 |
+
persist_directory=CHROMA_DIR,
|
| 30 |
+
embedding_function=embeddings
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# ๐น Cross-Encoder laden
|
| 34 |
+
reranker = CrossEncoder("cross-encoder/stsb-roberta-large", device=device)
|
| 35 |
+
|
| 36 |
+
# ๐น Funktion fรผr semantische Empfehlungen
|
| 37 |
+
def retrieve_semantic_recommendations(query: str, top_k: int = 10):
|
| 38 |
+
recs = db_movies.similarity_search(query, k=50)
|
| 39 |
+
pairs = [(query, rec.page_content) for rec in recs]
|
| 40 |
+
scores = reranker.predict(pairs, batch_size=8)
|
| 41 |
+
|
| 42 |
+
scores_dict = {}
|
| 43 |
+
for rec, score in zip(recs, scores):
|
| 44 |
+
doc_id = rec.metadata["doc_id"]
|
| 45 |
+
if doc_id not in scores_dict or score > scores_dict[doc_id]["score"]:
|
| 46 |
+
scores_dict[doc_id] = {"score": score, "rec": rec}
|
| 47 |
+
|
| 48 |
+
unique_top_recs = sorted(scores_dict.values(), key=lambda x: x["score"], reverse=True)[:top_k]
|
| 49 |
+
|
| 50 |
+
gallery = []
|
| 51 |
+
for item in unique_top_recs:
|
| 52 |
+
rec = item["rec"]
|
| 53 |
+
metadata = rec.metadata
|
| 54 |
+
|
| 55 |
+
# Poster aus CSV anhand doc_id
|
| 56 |
+
movie_row = movies[movies["doc_id"] == metadata["doc_id"]].iloc[0]
|
| 57 |
+
cover_url = movie_row.get("poster_url", PLACEHOLDER_IMAGE)
|
| 58 |
+
|
| 59 |
+
label = f"{movie_row['title']}\n\n{movie_row['description']}"
|
| 60 |
+
gallery.append((cover_url, label))
|
| 61 |
+
|
| 62 |
+
return gallery
|
| 63 |
+
|
| 64 |
+
# ๐น Gradio-App
|
| 65 |
+
with gr.Blocks(theme=gr.themes.Glass()) as demo:
|
| 66 |
+
gr.Markdown("# ๐ฌ Semantic Movie Recommender (Online Posters)")
|
| 67 |
+
|
| 68 |
+
with gr.Row():
|
| 69 |
+
user_query = gr.Textbox(label="Describe your ideal movie", placeholder="e.g., A sci-fi movie about time travel")
|
| 70 |
+
submit_button = gr.Button("๐ Find recommendations")
|
| 71 |
+
|
| 72 |
+
gr.Markdown("## ๐ฟ Recommended Movies")
|
| 73 |
+
output = gr.Gallery(label="Movies", columns=3, rows=3, show_label=True)
|
| 74 |
+
|
| 75 |
+
submit_button.click(fn=retrieve_semantic_recommendations, inputs=user_query, outputs=output)
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
demo.launch()
|