fdaudens commited on
Commit
df910d2
·
verified ·
1 Parent(s): aa877c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -45
app.py CHANGED
@@ -99,39 +99,6 @@ def do_similarity(text_a: str, text_b: str, dims: int = DEFAULT_DIMS) -> float:
99
  b = model.encode_document([text_b], normalize_embeddings=True, convert_to_numpy=True)[0][:dims]
100
  return float(np.dot(a, b))
101
 
102
- # Extractive summarization using EmbeddingGemma's Summarization prompt
103
- def _split_sents(text: str):
104
- parts = re.split(r"(?<=[\.!?])\s+", text.strip())
105
- return [p.strip() for p in parts if p.strip()]
106
-
107
- def summarize_extractive(text: str, n: int, dims: int, lambda_diversity: float = 0.7) -> str:
108
- sents = _split_sents(text)
109
- if not sents:
110
- return ""
111
- embs = model.encode(
112
- sents,
113
- prompt_name="Summarization",
114
- normalize_embeddings=True,
115
- convert_to_numpy=True,
116
- batch_size=128,
117
- )[:, :dims]
118
- centroid = embs.mean(axis=0)
119
- base = embs @ centroid
120
-
121
- picked = []
122
- for _ in range(min(n, len(sents))):
123
- if not picked:
124
- i = int(np.argmax(base))
125
- else:
126
- sim_to_sel = np.max(embs[picked] @ embs.T, axis=0)
127
- mmr = (1 - lambda_diversity) * base + lambda_diversity * (1 - sim_to_sel)
128
- i = int(np.argmax(mmr))
129
- picked.append(i)
130
- base[i] = -1e9
131
- # keep original order
132
- ordered = [s for _, s in sorted(zip(picked, [sents[i] for i in picked]))]
133
- return " ".join(ordered)
134
-
135
  # ---------- Gradio UI ----------
136
  with gr.Blocks(title="EmbeddingGemma × Wikipedia (EN corpus)") as demo:
137
  gr.Markdown(
@@ -144,7 +111,6 @@ with gr.Blocks(title="EmbeddingGemma × Wikipedia (EN corpus)") as demo:
144
  - **Semantic search** (English queries)
145
  - **Cross-lingual search** (queries in other languages → English articles)
146
  - **Sentence similarity** (compare two texts)
147
- - **Extractive summarization** (highlight key sentences from long text)
148
 
149
  🔗 Learn more in the [EmbeddingGemma blog post](https://huggingface.co/blog/embeddinggemma).
150
  """
@@ -182,16 +148,5 @@ with gr.Blocks(title="EmbeddingGemma × Wikipedia (EN corpus)") as demo:
182
  sim_out = gr.Number(label="Cosine similarity (-1..1)")
183
  sim_btn.click(lambda x, y, d: do_similarity(x, y, int(d)), [a, b, dims2], sim_out)
184
 
185
- # 4) Summarization (extractive)
186
- with gr.TabItem("Summarization"):
187
- gr.Markdown("**Extractive summarization** using EmbeddingGemma's `Summarization` prompt. Paste any long text.")
188
- with gr.Row():
189
- sum_dims = gr.Dropdown([str(d) for d in MATRYOSHKA_DIMS], value=str(DEFAULT_DIMS), label="Embedding dims")
190
- sum_n = gr.Slider(1, 10, value=5, step=1, label="Sentences in summary")
191
- sum_text = gr.Textbox(lines=12, label="Text to summarize", value="Paste a Wikipedia article (or any text) here…")
192
- sum_btn = gr.Button("Summarize")
193
- sum_out = gr.Textbox(lines=10, label="Summary")
194
- sum_btn.click(lambda t, n, d: summarize_extractive(t, int(n), int(d)), [sum_text, sum_n, sum_dims], sum_out)
195
-
196
  if __name__ == "__main__":
197
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
99
  b = model.encode_document([text_b], normalize_embeddings=True, convert_to_numpy=True)[0][:dims]
100
  return float(np.dot(a, b))
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  # ---------- Gradio UI ----------
103
  with gr.Blocks(title="EmbeddingGemma × Wikipedia (EN corpus)") as demo:
104
  gr.Markdown(
 
111
  - **Semantic search** (English queries)
112
  - **Cross-lingual search** (queries in other languages → English articles)
113
  - **Sentence similarity** (compare two texts)
 
114
 
115
  🔗 Learn more in the [EmbeddingGemma blog post](https://huggingface.co/blog/embeddinggemma).
116
  """
 
148
  sim_out = gr.Number(label="Cosine similarity (-1..1)")
149
  sim_btn.click(lambda x, y, d: do_similarity(x, y, int(d)), [a, b, dims2], sim_out)
150
 
 
 
 
 
 
 
 
 
 
 
 
151
  if __name__ == "__main__":
152
  demo.launch(server_name="0.0.0.0", server_port=7860)