Spaces:
Running
Running
update
Browse files
modular_graph_and_candidates.py
CHANGED
|
@@ -123,15 +123,21 @@ def embedding_similarity_clusters(models_root: Path, missing: List[str], thr: fl
|
|
| 123 |
names = list(texts)
|
| 124 |
all_embeddings = []
|
| 125 |
|
| 126 |
-
print("Encoding embeddings...")
|
| 127 |
batch_size = 1
|
|
|
|
| 128 |
for i in tqdm(range(0, len(names), batch_size), desc="Models", leave=False):
|
|
|
|
|
|
|
|
|
|
| 129 |
try:
|
| 130 |
-
|
|
|
|
| 131 |
emb = model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
|
| 132 |
all_embeddings.append(emb)
|
|
|
|
| 133 |
except Exception as e:
|
| 134 |
-
print(f"⚠️ GPU worker error for {
|
| 135 |
# Create zero embedding as placeholder to maintain consistency
|
| 136 |
zero_emb = np.zeros((1, model.get_sentence_embedding_dimension()), dtype=np.float32)
|
| 137 |
all_embeddings.append(zero_emb)
|
|
|
|
| 123 |
names = list(texts)
|
| 124 |
all_embeddings = []
|
| 125 |
|
| 126 |
+
print(f"Encoding embeddings for {len(names)} models...")
|
| 127 |
batch_size = 1
|
| 128 |
+
|
| 129 |
for i in tqdm(range(0, len(names), batch_size), desc="Models", leave=False):
|
| 130 |
+
model_name = names[i]
|
| 131 |
+
text_len = len(texts[model_name])
|
| 132 |
+
|
| 133 |
try:
|
| 134 |
+
print(f"Processing {model_name} (text length: {text_len})")
|
| 135 |
+
batch = [texts[model_name]]
|
| 136 |
emb = model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
|
| 137 |
all_embeddings.append(emb)
|
| 138 |
+
print(f"✓ Completed {model_name}")
|
| 139 |
except Exception as e:
|
| 140 |
+
print(f"⚠️ GPU worker error for {model_name}: {type(e).__name__}: {e}")
|
| 141 |
# Create zero embedding as placeholder to maintain consistency
|
| 142 |
zero_emb = np.zeros((1, model.get_sentence_embedding_dimension()), dtype=np.float32)
|
| 143 |
all_embeddings.append(zero_emb)
|