tomyvo commited on
Commit
b5e7f9f
ยท
verified ยท
1 Parent(s): bc03400

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -78
app.py CHANGED
@@ -1,78 +1,78 @@
1
- import pandas as pd
2
- from langchain_chroma import Chroma
3
- from langchain_huggingface import HuggingFaceEmbeddings
4
- from sentence_transformers import CrossEncoder
5
- import gradio as gr
6
- import torch
7
-
8
- # ๐Ÿ”น Pfade
9
- CHROMA_DIR = r"C:\Unkram123\Full_stack_projects\erkam_netflix\FINALE\chroma_movies_bge" # Lokaler Chroma-Ordner
10
- CSV_PATH = r"C:\Unkram123\Full_stack_projects\erkam_netflix\FINALE\FINALE_NEW_WITH_IMAGES.csv" # CSV mit Poster URLs
11
- PLACEHOLDER_IMAGE = r"C:\Unkram123\Full_stack_projects\erkam_netflix\FINALE\placeholder.png" # Falls Poster fehlt
12
-
13
- # ๐Ÿ”น CSV laden
14
- movies = pd.read_csv(CSV_PATH, encoding="utf-8")
15
- movies["doc_id"] = movies.index
16
-
17
- # ๐Ÿ”น CUDA Check
18
- device = "cuda" if torch.cuda.is_available() else "cpu"
19
-
20
- # ๐Ÿ”น Embeddings laden
21
- embeddings = HuggingFaceEmbeddings(
22
- model_name="BAAI/bge-m3",
23
- model_kwargs = {"device": device},
24
- encode_kwargs={"normalize_embeddings": True}
25
- )
26
-
27
- # ๐Ÿ”น Chroma-Index laden
28
- db_movies = Chroma(
29
- persist_directory=CHROMA_DIR,
30
- embedding_function=embeddings
31
- )
32
-
33
- # ๐Ÿ”น Cross-Encoder laden
34
- reranker = CrossEncoder("cross-encoder/stsb-roberta-large", device=device)
35
-
36
- # ๐Ÿ”น Funktion fรผr semantische Empfehlungen
37
- def retrieve_semantic_recommendations(query: str, top_k: int = 10):
38
- recs = db_movies.similarity_search(query, k=50)
39
- pairs = [(query, rec.page_content) for rec in recs]
40
- scores = reranker.predict(pairs, batch_size=8)
41
-
42
- scores_dict = {}
43
- for rec, score in zip(recs, scores):
44
- doc_id = rec.metadata["doc_id"]
45
- if doc_id not in scores_dict or score > scores_dict[doc_id]["score"]:
46
- scores_dict[doc_id] = {"score": score, "rec": rec}
47
-
48
- unique_top_recs = sorted(scores_dict.values(), key=lambda x: x["score"], reverse=True)[:top_k]
49
-
50
- gallery = []
51
- for item in unique_top_recs:
52
- rec = item["rec"]
53
- metadata = rec.metadata
54
-
55
- # Poster aus CSV anhand doc_id
56
- movie_row = movies[movies["doc_id"] == metadata["doc_id"]].iloc[0]
57
- cover_url = movie_row.get("poster_url", PLACEHOLDER_IMAGE)
58
-
59
- label = f"{movie_row['title']}\n\n{movie_row['description']}"
60
- gallery.append((cover_url, label))
61
-
62
- return gallery
63
-
64
- # ๐Ÿ”น Gradio-App
65
- with gr.Blocks(theme=gr.themes.Glass()) as demo:
66
- gr.Markdown("# ๐ŸŽฌ Semantic Movie Recommender (Online Posters)")
67
-
68
- with gr.Row():
69
- user_query = gr.Textbox(label="Describe your ideal movie", placeholder="e.g., A sci-fi movie about time travel")
70
- submit_button = gr.Button("๐Ÿ” Find recommendations")
71
-
72
- gr.Markdown("## ๐Ÿฟ Recommended Movies")
73
- output = gr.Gallery(label="Movies", columns=3, rows=3, show_label=True)
74
-
75
- submit_button.click(fn=retrieve_semantic_recommendations, inputs=user_query, outputs=output)
76
-
77
- if __name__ == "__main__":
78
- demo.launch()
 
1
+ import pandas as pd
2
+ from langchain_chroma import Chroma
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from sentence_transformers import CrossEncoder
5
+ import gradio as gr
6
+ import torch
7
+
8
+ # ๐Ÿ”น Pfade
9
+ CHROMA_DIR = "chroma_movies_bge" # Lokaler Chroma-Ordner
10
+ CSV_PATH = "FINALE_NEW_WITH_IMAGES.csv" # CSV mit Poster URLs
11
+ PLACEHOLDER_IMAGE = "placeholder.png" # Falls Poster fehlt
12
+
13
+ # ๐Ÿ”น CSV laden
14
+ movies = pd.read_csv(CSV_PATH, encoding="utf-8")
15
+ movies["doc_id"] = movies.index
16
+
17
+ # ๐Ÿ”น CUDA Check
18
+ device = "cuda" if torch.cuda.is_available() else "cpu"
19
+
20
+ # ๐Ÿ”น Embeddings laden
21
+ embeddings = HuggingFaceEmbeddings(
22
+ model_name="BAAI/bge-m3",
23
+ model_kwargs = {"device": device},
24
+ encode_kwargs={"normalize_embeddings": True}
25
+ )
26
+
27
+ # ๐Ÿ”น Chroma-Index laden
28
+ db_movies = Chroma(
29
+ persist_directory=CHROMA_DIR,
30
+ embedding_function=embeddings
31
+ )
32
+
33
+ # ๐Ÿ”น Cross-Encoder laden
34
+ reranker = CrossEncoder("cross-encoder/stsb-roberta-large", device=device)
35
+
36
+ # ๐Ÿ”น Funktion fรผr semantische Empfehlungen
37
+ def retrieve_semantic_recommendations(query: str, top_k: int = 10):
38
+ recs = db_movies.similarity_search(query, k=50)
39
+ pairs = [(query, rec.page_content) for rec in recs]
40
+ scores = reranker.predict(pairs, batch_size=8)
41
+
42
+ scores_dict = {}
43
+ for rec, score in zip(recs, scores):
44
+ doc_id = rec.metadata["doc_id"]
45
+ if doc_id not in scores_dict or score > scores_dict[doc_id]["score"]:
46
+ scores_dict[doc_id] = {"score": score, "rec": rec}
47
+
48
+ unique_top_recs = sorted(scores_dict.values(), key=lambda x: x["score"], reverse=True)[:top_k]
49
+
50
+ gallery = []
51
+ for item in unique_top_recs:
52
+ rec = item["rec"]
53
+ metadata = rec.metadata
54
+
55
+ # Poster aus CSV anhand doc_id
56
+ movie_row = movies[movies["doc_id"] == metadata["doc_id"]].iloc[0]
57
+ cover_url = movie_row.get("poster_url", PLACEHOLDER_IMAGE)
58
+
59
+ label = f"{movie_row['title']}\n\n{movie_row['description']}"
60
+ gallery.append((cover_url, label))
61
+
62
+ return gallery
63
+
64
+ # ๐Ÿ”น Gradio-App
65
+ with gr.Blocks(theme=gr.themes.Glass()) as demo:
66
+ gr.Markdown("# ๐ŸŽฌ Semantic Movie Recommender (Online Posters)")
67
+
68
+ with gr.Row():
69
+ user_query = gr.Textbox(label="Describe your ideal movie", placeholder="e.g., A sci-fi movie about time travel")
70
+ submit_button = gr.Button("๐Ÿ” Find recommendations")
71
+
72
+ gr.Markdown("## ๐Ÿฟ Recommended Movies")
73
+ output = gr.Gallery(label="Movies", columns=3, rows=3, show_label=True)
74
+
75
+ submit_button.click(fn=retrieve_semantic_recommendations, inputs=user_query, outputs=output)
76
+
77
+ if __name__ == "__main__":
78
+ demo.launch()