mlabonne commited on
Commit
525a58f
·
verified ·
1 Parent(s): 5bcb7c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -9,7 +9,6 @@ import gradio as gr
9
  from pylate import indexes, models, retrieve
10
  from documents import MULTILINGUAL_DOCUMENTS
11
 
12
- # Configure logging
13
  logging.basicConfig(
14
  level=logging.INFO,
15
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -26,14 +25,13 @@ class CrossLingualRetriever:
26
 
27
  self.model = models.ColBERT(model_name_or_path=model_name)
28
 
29
- # Set padding token if not present
30
- if self.model.tokenizer.pad_token is None and hasattr(self.model.tokenizer, "eos_token"):
31
- self.model.tokenizer.pad_token = self.model.tokenizer.eos_token
32
 
33
  # Initialize PLAID index
34
  self.index = indexes.PLAID(
35
  index_folder="pylate-index",
36
- index_name="cross_lingual_index",
37
  override=True,
38
  )
39
 
@@ -163,24 +161,22 @@ def search_documents(query: str, top_k: int) -> Tuple[str, str]:
163
 
164
  # Example queries in different languages
165
  EXAMPLE_QUERIES = [
166
- ["What is artificial intelligence?", 5],
167
- ["¿Qué es el cambio climático?", 5],
168
- ["양자 컴퓨팅이란 무엇인가요?", 5],
169
  ["ما هي الصحة النفسية؟", 5],
170
- ["量子计算是什么?", 5],
171
  ]
172
 
173
 
174
  # Build Gradio interface
175
- with gr.Blocks(title="Cross-Lingual Retrieval Demo", theme=gr.themes.Soft()) as demo:
176
  gr.Markdown(
177
  """
178
  # 🌍 Cross-Lingual Document Retrieval
179
  ### Powered by [LiquidAI/LFM2-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2-ColBERT-350M)
180
 
181
- Search for documents in any language using queries in any language!
182
-
183
- The model finds semantically similar documents regardless of the language mismatch.
184
 
185
  **Supported Languages:** English, Arabic, Chinese, French, German, Japanese, Korean, and Spanish
186
  """
@@ -196,7 +192,7 @@ with gr.Blocks(title="Cross-Lingual Retrieval Demo", theme=gr.themes.Soft()) as
196
 
197
  top_k_slider = gr.Slider(
198
  minimum=1,
199
- maximum=10,
200
  value=5,
201
  step=1,
202
  label="Number of results to retrieve",
@@ -244,7 +240,7 @@ with gr.Blocks(title="Cross-Lingual Retrieval Demo", theme=gr.themes.Soft()) as
244
  )
245
 
246
  # Examples section
247
- gr.Markdown("### 💡 Try these example queries:")
248
  gr.Examples(
249
  examples=EXAMPLE_QUERIES,
250
  inputs=[query_input, top_k_slider],
@@ -255,7 +251,6 @@ with gr.Blocks(title="Cross-Lingual Retrieval Demo", theme=gr.themes.Soft()) as
255
 
256
  gr.Markdown(
257
  """
258
- ---
259
  **How it works:** This demo uses the LiquidAI LFM2-ColBERT-350M model with late interaction retrieval.
260
  The model encodes both queries and documents into token-level embeddings, enabling fine-grained matching
261
  across languages with high speed and accuracy.
 
9
  from pylate import indexes, models, retrieve
10
  from documents import MULTILINGUAL_DOCUMENTS
11
 
 
12
  logging.basicConfig(
13
  level=logging.INFO,
14
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 
25
 
26
  self.model = models.ColBERT(model_name_or_path=model_name)
27
 
28
+ # Set padding token
29
+ self.model.tokenizer.pad_token = self.model.tokenizer.eos_token
 
30
 
31
  # Initialize PLAID index
32
  self.index = indexes.PLAID(
33
  index_folder="pylate-index",
34
+ index_name="index",
35
  override=True,
36
  )
37
 
 
161
 
162
  # Example queries in different languages
163
  EXAMPLE_QUERIES = [
164
+ ["What is artificial intelligence?", 8],
165
+ ["¿Qué es el cambio climático?", 4],
166
+ ["양자 컴퓨팅이란 무엇인가요?", 6],
167
  ["ما هي الصحة النفسية؟", 5],
168
+ ["量子计算是什么?", 8],
169
  ]
170
 
171
 
172
  # Build Gradio interface
173
+ with gr.Blocks(title="Cross-Lingual Retrieval Demo", theme=gr.themes.Soft(primary_hue="purple")) as demo:
174
  gr.Markdown(
175
  """
176
  # 🌍 Cross-Lingual Document Retrieval
177
  ### Powered by [LiquidAI/LFM2-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2-ColBERT-350M)
178
 
179
+ Find semantically similar documents across different languages.
 
 
180
 
181
  **Supported Languages:** English, Arabic, Chinese, French, German, Japanese, Korean, and Spanish
182
  """
 
192
 
193
  top_k_slider = gr.Slider(
194
  minimum=1,
195
+ maximum=12,
196
  value=5,
197
  step=1,
198
  label="Number of results to retrieve",
 
240
  )
241
 
242
  # Examples section
243
+ gr.Markdown("### 💡 Example queries:")
244
  gr.Examples(
245
  examples=EXAMPLE_QUERIES,
246
  inputs=[query_input, top_k_slider],
 
251
 
252
  gr.Markdown(
253
  """
 
254
  **How it works:** This demo uses the LiquidAI LFM2-ColBERT-350M model with late interaction retrieval.
255
  The model encodes both queries and documents into token-level embeddings, enabling fine-grained matching
256
  across languages with high speed and accuracy.