Spaces:
Sleeping
Sleeping
improve inference
Browse files
app.py
CHANGED
|
@@ -11,7 +11,11 @@ global df
|
|
| 11 |
|
| 12 |
# Load the static embeddings model from HuggingFace hub
|
| 13 |
model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
|
| 14 |
-
model = SentenceTransformer(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def get_iframe(hub_repo_id):
|
|
@@ -58,7 +62,7 @@ def vectorize_dataset(hub_repo_id: str, split: str, column: str):
|
|
| 58 |
gr.Info("Vectorizing dataset...")
|
| 59 |
ds = load_dataset(hub_repo_id)
|
| 60 |
df = ds[split].to_polars()
|
| 61 |
-
embeddings = model.encode(df[column].cast(str)
|
| 62 |
return embeddings
|
| 63 |
|
| 64 |
|
|
|
|
| 11 |
|
| 12 |
# Load the static embeddings model from HuggingFace hub
|
| 13 |
model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
|
| 14 |
+
model = SentenceTransformer(
|
| 15 |
+
model_name,
|
| 16 |
+
device="cpu",
|
| 17 |
+
tokenizer_kwargs={"model_max_length": 512},
|
| 18 |
+
)
|
| 19 |
|
| 20 |
|
| 21 |
def get_iframe(hub_repo_id):
|
|
|
|
| 62 |
gr.Info("Vectorizing dataset...")
|
| 63 |
ds = load_dataset(hub_repo_id)
|
| 64 |
df = ds[split].to_polars()
|
| 65 |
+
embeddings = model.encode(df[column].cast(str), show_progress_bar=True, batch_size=128)
|
| 66 |
return embeddings
|
| 67 |
|
| 68 |
|