radoslavralev commited on
Commit
7fe55cc
·
verified ·
1 Parent(s): 80f0787

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -12,54 +12,51 @@ tags:
12
  - retrieval
13
  - reranking
14
  - generated_from_trainer
15
- - dataset_size:8193277
16
- - loss:CosineSimilarityLoss
17
- base_model: Alibaba-NLP/gte-modernbert-base
18
  widget:
19
- - source_sentence: They are sometimes called Marg or also Path in Hindi .
 
20
  sentences:
21
- - Largs was born in Brisbane House in Noddsdale , near Brisbane in Ayrshire , Scotland
22
- , the son of Sir Thomas Brisbane and Dame Eleanora Brisbane .
23
- - Its smallest radius is 1.4 ( 131 thousand light years ) and largest 0.7 angle
24
- minutes ( 65 thousand light years ) .
25
- - They are also called Marg or sometimes the path in the Hindi .
26
- - source_sentence: The main mode of play in `` Crash Bash `` is the Adventure Mode
27
- , in which one or two players must win all 28 levels to complete .
 
28
  sentences:
29
- - Parkton is a city in Robeson County , North Carolina , in the Lumberton Metro
30
- area , in the United States .
31
- - The CANTAB tests were developed by Professor Barbara Sahakian and Professor Trevor
32
- Robbins .
33
- - The main mode in `` Crash Bash `` is the adventure mode in which one or two players
34
- must complete all 28 levels to win .
35
- - source_sentence: It was formed in December 2014 from elements of the disbanded 51st
36
- Mechanized Brigade and newly mobilized units .
37
  sentences:
38
- - It had branches in feature films , television , physical and digital publishing
39
- , merchandise , recorded music , digital and online media applications and mobile
40
- and social games .
41
- - Notts County and Arsenal were relegated to the Second Division ; Preston North
42
- End and Burnley were promoted to the First Division .
43
- - It was formed in December 2014 from elements of the dissolved 51st Mechanized
44
- Brigade and newly mobilized units .
45
- - source_sentence: The band pursued `` signals `` in January 2012 in three weeks ,
46
- and drums were recorded in a day and a half .
47
  sentences:
48
- - Kearsarge Lakes , Kearsarge Pass Trail , and Rae Lakes all have a maximum 2 nights
49
- stay , and Bullfrog Lake along the Charlotte Lake is closed to camping .
50
- - The band tracked `` Signals `` in three weeks in January 2012 . Drums were recorded
51
- in a day and a half .
52
- - From 1954 to 1961 , he was married to Stella Caralis and from 1978 until his death
53
- with Nina Bohlen .
54
- - source_sentence: A special case is of the Country B loyalist who controls agents
55
- or provides managerial supporting or other functions against Country A .
56
  sentences:
57
- - A special case is the loyalist of Country B , who controls agents or provides
58
- management support or other functions against Country A .
59
- - Music Story is a music service website and international music data provider that
60
- curates , aggregates and analyses metadata for digital music services .
61
- - These six cars were painted in the same lacquering as the buffet cars , silver
62
- with red lines and text .
63
  datasets:
64
  - redis/langcache-sentencepairs-v2
65
  pipeline_tag: sentence-similarity
@@ -84,42 +81,42 @@ model-index:
84
  type: test
85
  metrics:
86
  - type: cosine_accuracy@1
87
- value: 0.5861241448475948
88
  name: Cosine Accuracy@1
89
  - type: cosine_precision@1
90
- value: 0.5861241448475948
91
  name: Cosine Precision@1
92
  - type: cosine_recall@1
93
- value: 0.5679885764966713
94
  name: Cosine Recall@1
95
  - type: cosine_ndcg@10
96
- value: 0.7729838064849864
97
  name: Cosine Ndcg@10
98
  - type: cosine_mrr@1
99
- value: 0.5861241448475948
100
  name: Cosine Mrr@1
101
  - type: cosine_map@100
102
- value: 0.7216697804426214
103
  name: Cosine Map@100
104
  - type: cosine_auc_precision_cache_hit_ratio
105
- value: 0.34889374678008206
106
  name: Cosine Auc Precision Cache Hit Ratio
107
  - type: cosine_auc_similarity_distribution
108
- value: 0.15252445696542008
109
  name: Cosine Auc Similarity Distribution
110
  ---
111
 
112
  # Redis fine-tuned BiEncoder model for semantic caching on LangCache
113
 
114
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) on the [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for sentence pair similarity.
115
 
116
  ## Model Details
117
 
118
  ### Model Description
119
  - **Model Type:** Sentence Transformer
120
- - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
121
  - **Maximum Sequence Length:** 100 tokens
122
- - **Output Dimensionality:** 768 dimensions
123
  - **Similarity Function:** Cosine Similarity
124
  - **Training Dataset:**
125
  - [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
@@ -137,7 +134,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
137
  ```
138
  SentenceTransformer(
139
  (0): Transformer({'max_seq_length': 100, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
140
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
141
  )
142
  ```
143
 
@@ -159,20 +156,20 @@ from sentence_transformers import SentenceTransformer
159
  model = SentenceTransformer("redis/langcache-embed-experimental")
160
  # Run inference
161
  sentences = [
162
- 'A special case is of the Country B loyalist who controls agents or provides managerial supporting or other functions against Country A .',
163
- 'A special case is the loyalist of Country B , who controls agents or provides management support or other functions against Country A .',
164
- 'Music Story is a music service website and international music data provider that curates , aggregates and analyses metadata for digital music services .',
165
  ]
166
  embeddings = model.encode(sentences)
167
  print(embeddings.shape)
168
- # [3, 768]
169
 
170
  # Get the similarity scores for the embeddings
171
  similarities = model.similarity(embeddings, embeddings)
172
  print(similarities)
173
- # tensor([[1.0000, 0.9844, 0.5195],
174
- # [0.9844, 0.9922, 0.5078],
175
- # [0.5195, 0.5078, 0.9922]], dtype=torch.bfloat16)
176
  ```
177
 
178
  <!--
@@ -208,16 +205,16 @@ You can finetune this model on your own dataset.
208
  * Dataset: `test`
209
  * Evaluated with <code>ir_evaluator.CustomInformationRetrievalEvaluator</code>
210
 
211
- | Metric | Value |
212
- |:-------------------------------------|:----------|
213
- | cosine_accuracy@1 | 0.5861 |
214
- | cosine_precision@1 | 0.5861 |
215
- | cosine_recall@1 | 0.568 |
216
- | **cosine_ndcg@10** | **0.773** |
217
- | cosine_mrr@1 | 0.5861 |
218
- | cosine_map@100 | 0.7217 |
219
- | cosine_auc_precision_cache_hit_ratio | 0.3489 |
220
- | cosine_auc_similarity_distribution | 0.1525 |
221
 
222
  <!--
223
  ## Bias, Risks and Limitations
@@ -238,23 +235,25 @@ You can finetune this model on your own dataset.
238
  #### LangCache Sentence Pairs (all)
239
 
240
  * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
241
- * Size: 72,021 training samples
242
- * Columns: <code>sentence_a</code>, <code>sentence_b</code>, and <code>label</code>
243
  * Approximate statistics based on the first 1000 samples:
244
- | | sentence_a | sentence_b | label |
245
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
246
- | type | string | string | int |
247
- | details | <ul><li>min: 8 tokens</li><li>mean: 27.46 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 27.36 tokens</li><li>max: 52 tokens</li></ul> | <ul><li>0: ~50.30%</li><li>1: ~49.70%</li></ul> |
248
  * Samples:
249
- | sentence_a | sentence_b | label |
250
- |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
251
- | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code> | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code> | <code>1</code> |
252
- | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> | <code>0</code> |
253
- | <code>After losing his second election , he resigned as opposition leader and was replaced by Geoff Pearsall .</code> | <code>Max Bingham resigned as opposition leader after losing his second election , and was replaced by Geoff Pearsall .</code> | <code>1</code> |
254
- * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
255
  ```json
256
  {
257
- "loss_fct": "torch.nn.modules.loss.BCELoss"
 
 
258
  }
259
  ```
260
 
@@ -263,30 +262,32 @@ You can finetune this model on your own dataset.
263
  #### LangCache Sentence Pairs (all)
264
 
265
  * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
266
- * Size: 72,021 evaluation samples
267
- * Columns: <code>sentence_a</code>, <code>sentence_b</code>, and <code>label</code>
268
  * Approximate statistics based on the first 1000 samples:
269
- | | sentence_a | sentence_b | label |
270
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
271
- | type | string | string | int |
272
- | details | <ul><li>min: 8 tokens</li><li>mean: 27.46 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 27.36 tokens</li><li>max: 52 tokens</li></ul> | <ul><li>0: ~50.30%</li><li>1: ~49.70%</li></ul> |
273
  * Samples:
274
- | sentence_a | sentence_b | label |
275
- |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
276
- | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code> | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code> | <code>1</code> |
277
- | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> | <code>0</code> |
278
- | <code>After losing his second election , he resigned as opposition leader and was replaced by Geoff Pearsall .</code> | <code>Max Bingham resigned as opposition leader after losing his second election , and was replaced by Geoff Pearsall .</code> | <code>1</code> |
279
- * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
280
  ```json
281
  {
282
- "loss_fct": "torch.nn.modules.loss.BCELoss"
 
 
283
  }
284
  ```
285
 
286
  ### Training Logs
287
  | Epoch | Step | test_cosine_ndcg@10 |
288
  |:-----:|:----:|:-------------------:|
289
- | -1 | -1 | 0.7730 |
290
 
291
 
292
  ### Framework Versions
 
12
  - retrieval
13
  - reranking
14
  - generated_from_trainer
15
+ - dataset_size:9233417
16
+ - loss:ArcFaceInBatchLoss
17
+ base_model: answerdotai/ModernBERT-large
18
  widget:
19
+ - source_sentence: Hayley Vaughan portrayed Ripa on the ABC daytime soap opera , ``
20
+ All My Children `` , between 1990 and 2002 .
21
  sentences:
22
+ - Traxxpad is a music application for Sony 's PlayStation Portable published by
23
+ Definitive Studios and developed by Eidos Interactive .
24
+ - Between 1990 and 2002 , Hayley Vaughan Ripa portrayed in the ABC soap opera ``
25
+ All My Children `` .
26
+ - Between 1990 and 2002 , Ripa Hayley portrayed Vaughan in the ABC soap opera ``
27
+ All My Children `` .
28
+ - source_sentence: Olivella monilifera is a species of dwarf sea snail , small gastropod
29
+ mollusk in the family Olivellidae , the marine olives .
30
  sentences:
31
+ - Olivella monilifera is a species of the dwarf - sea snail , small gastropod mollusk
32
+ in the Olivellidae family , the marine olives .
33
+ - He was cut by the Browns after being signed by the Bills in 2013 . He was later
34
+ released .
35
+ - Olivella monilifera is a kind of sea snail , marine gastropod mollusk in the Olivellidae
36
+ family , the dwarf olives .
37
+ - source_sentence: Hayashi said that Mackey `` is a sort of `` of the original model
38
+ for Tenchi .
39
  sentences:
40
+ - In the summer of 2009 , Ellick shot a documentary about Malala Yousafzai .
41
+ - Hayashi said that Mackey is `` sort of `` the original model for Tenchi .
42
+ - Mackey said that Hayashi is `` sort of `` the original model for Tenchi .
43
+ - source_sentence: Much of the film was shot on location in Los Angeles and in nearby
44
+ Burbank and Glendale .
 
 
 
 
45
  sentences:
46
+ - Much of the film was shot on location in Los Angeles and in nearby Burbank and
47
+ Glendale .
48
+ - Much of the film was shot on site in Burbank and Glendale and in the nearby Los
49
+ Angeles .
50
+ - Traxxpad is a music application for the Sony PlayStation Portable developed by
51
+ the Definitive Studios and published by Eidos Interactive .
52
+ - source_sentence: According to him , the earth is the carrier of his artistic work
53
+ , which is only integrated into the creative process by minimal changes .
54
  sentences:
55
+ - National players are Bold players .
56
+ - According to him , earth is the carrier of his artistic work being integrated
57
+ into the creative process only by minimal changes .
58
+ - According to him , earth is the carrier of his creative work being integrated
59
+ into the artistic process only by minimal changes .
 
60
  datasets:
61
  - redis/langcache-sentencepairs-v2
62
  pipeline_tag: sentence-similarity
 
81
  type: test
82
  metrics:
83
  - type: cosine_accuracy@1
84
+ value: 0.44081091729646477
85
  name: Cosine Accuracy@1
86
  - type: cosine_precision@1
87
+ value: 0.44081091729646477
88
  name: Cosine Precision@1
89
  - type: cosine_recall@1
90
+ value: 0.42663486382682986
91
  name: Cosine Recall@1
92
  - type: cosine_ndcg@10
93
+ value: 0.6274011007244752
94
  name: Cosine Ndcg@10
95
  - type: cosine_mrr@1
96
+ value: 0.44081091729646477
97
  name: Cosine Mrr@1
98
  - type: cosine_map@100
99
+ value: 0.5749605963252064
100
  name: Cosine Map@100
101
  - type: cosine_auc_precision_cache_hit_ratio
102
+ value: 0.27130175854619276
103
  name: Cosine Auc Precision Cache Hit Ratio
104
  - type: cosine_auc_similarity_distribution
105
+ value: 0.40770905754259995
106
  name: Cosine Auc Similarity Distribution
107
  ---
108
 
109
  # Redis fine-tuned BiEncoder model for semantic caching on LangCache
110
 
111
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large) on the [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2) dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for sentence pair similarity.
112
 
113
  ## Model Details
114
 
115
  ### Model Description
116
  - **Model Type:** Sentence Transformer
117
+ - **Base model:** [answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large) <!-- at revision 45bb4654a4d5aaff24dd11d4781fa46d39bf8c13 -->
118
  - **Maximum Sequence Length:** 100 tokens
119
+ - **Output Dimensionality:** 1024 dimensions
120
  - **Similarity Function:** Cosine Similarity
121
  - **Training Dataset:**
122
  - [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
 
134
  ```
135
  SentenceTransformer(
136
  (0): Transformer({'max_seq_length': 100, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
137
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
138
  )
139
  ```
140
 
 
156
  model = SentenceTransformer("redis/langcache-embed-experimental")
157
  # Run inference
158
  sentences = [
159
+ 'According to him , the earth is the carrier of his artistic work , which is only integrated into the creative process by minimal changes .',
160
+ 'According to him , earth is the carrier of his artistic work being integrated into the creative process only by minimal changes .',
161
+ 'According to him , earth is the carrier of his creative work being integrated into the artistic process only by minimal changes .',
162
  ]
163
  embeddings = model.encode(sentences)
164
  print(embeddings.shape)
165
+ # [3, 1024]
166
 
167
  # Get the similarity scores for the embeddings
168
  similarities = model.similarity(embeddings, embeddings)
169
  print(similarities)
170
+ # tensor([[1.0000, 0.9844, 0.9844],
171
+ # [0.9844, 0.9961, 0.9922],
172
+ # [0.9844, 0.9922, 0.9961]], dtype=torch.bfloat16)
173
  ```
174
 
175
  <!--
 
205
  * Dataset: `test`
206
  * Evaluated with <code>ir_evaluator.CustomInformationRetrievalEvaluator</code>
207
 
208
+ | Metric | Value |
209
+ |:-------------------------------------|:-----------|
210
+ | cosine_accuracy@1 | 0.4408 |
211
+ | cosine_precision@1 | 0.4408 |
212
+ | cosine_recall@1 | 0.4266 |
213
+ | **cosine_ndcg@10** | **0.6274** |
214
+ | cosine_mrr@1 | 0.4408 |
215
+ | cosine_map@100 | 0.575 |
216
+ | cosine_auc_precision_cache_hit_ratio | 0.2713 |
217
+ | cosine_auc_similarity_distribution | 0.4077 |
218
 
219
  <!--
220
  ## Bias, Risks and Limitations
 
235
  #### LangCache Sentence Pairs (all)
236
 
237
  * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
238
+ * Size: 126,938 training samples
239
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
240
  * Approximate statistics based on the first 1000 samples:
241
+ | | anchor | positive | negative |
242
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
243
+ | type | string | string | string |
244
+ | details | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 48 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 26.54 tokens</li><li>max: 61 tokens</li></ul> |
245
  * Samples:
246
+ | anchor | positive | negative |
247
+ |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
248
+ | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code> | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code> | <code>how can I get financial freedom as soon as possible?</code> |
249
+ | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code> | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code> | <code>The older Punts are still very much in existence today and race in the same fleets as the newer boats .</code> |
250
+ | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley , , was located at Turner Valley Bar N Ranch Airport , southwest of Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> |
251
+ * Loss: <code>losses.ArcFaceInBatchLoss</code> with these parameters:
252
  ```json
253
  {
254
+ "scale": 20.0,
255
+ "similarity_fct": "cos_sim",
256
+ "gather_across_devices": false
257
  }
258
  ```
259
 
 
262
  #### LangCache Sentence Pairs (all)
263
 
264
  * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
265
+ * Size: 126,938 evaluation samples
266
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
267
  * Approximate statistics based on the first 1000 samples:
268
+ | | anchor | positive | negative |
269
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
270
+ | type | string | string | string |
271
+ | details | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 48 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 26.54 tokens</li><li>max: 61 tokens</li></ul> |
272
  * Samples:
273
+ | anchor | positive | negative |
274
+ |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
275
+ | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code> | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code> | <code>how can I get financial freedom as soon as possible?</code> |
276
+ | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code> | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code> | <code>The older Punts are still very much in existence today and race in the same fleets as the newer boats .</code> |
277
+ | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley , , was located at Turner Valley Bar N Ranch Airport , southwest of Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> |
278
+ * Loss: <code>losses.ArcFaceInBatchLoss</code> with these parameters:
279
  ```json
280
  {
281
+ "scale": 20.0,
282
+ "similarity_fct": "cos_sim",
283
+ "gather_across_devices": false
284
  }
285
  ```
286
 
287
  ### Training Logs
288
  | Epoch | Step | test_cosine_ndcg@10 |
289
  |:-----:|:----:|:-------------------:|
290
+ | -1 | -1 | 0.6274 |
291
 
292
 
293
  ### Framework Versions
config.json CHANGED
@@ -19,10 +19,10 @@
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
21
  "hidden_activation": "gelu",
22
- "hidden_size": 768,
23
  "initializer_cutoff_factor": 2.0,
24
  "initializer_range": 0.02,
25
- "intermediate_size": 1152,
26
  "layer_norm_eps": 1e-05,
27
  "local_attention": 128,
28
  "local_rope_theta": 10000.0,
@@ -32,8 +32,8 @@
32
  "model_type": "modernbert",
33
  "norm_bias": false,
34
  "norm_eps": 1e-05,
35
- "num_attention_heads": 12,
36
- "num_hidden_layers": 22,
37
  "pad_token_id": 50283,
38
  "position_embedding_type": "absolute",
39
  "repad_logits_with_grad": false,
 
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
21
  "hidden_activation": "gelu",
22
+ "hidden_size": 1024,
23
  "initializer_cutoff_factor": 2.0,
24
  "initializer_range": 0.02,
25
+ "intermediate_size": 2624,
26
  "layer_norm_eps": 1e-05,
27
  "local_attention": 128,
28
  "local_rope_theta": 10000.0,
 
32
  "model_type": "modernbert",
33
  "norm_bias": false,
34
  "norm_eps": 1e-05,
35
+ "num_attention_heads": 16,
36
+ "num_hidden_layers": 28,
37
  "pad_token_id": 50283,
38
  "position_embedding_type": "absolute",
39
  "repad_logits_with_grad": false,
config_sentence_transformers.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "__version__": {
3
  "sentence_transformers": "5.1.0",
4
  "transformers": "4.56.0",
@@ -9,6 +10,5 @@
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
- "similarity_fn_name": "cosine",
13
- "model_type": "SentenceTransformer"
14
  }
 
1
  {
2
+ "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.1.0",
5
  "transformers": "4.56.0",
 
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
 
14
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95d02211c4cca89113f9f3e93ed91f5176bf50170faa2cb835f7bfea15bb9dd2
3
- size 298041696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f84ab85561ef1687cbe92711a697e05cba61d13ec58fc3ec279bd64147715b8
3
+ size 789580328
tokenizer_config.json CHANGED
@@ -938,7 +938,7 @@
938
  "input_ids",
939
  "attention_mask"
940
  ],
941
- "model_max_length": 1000000000000000019884624838656,
942
  "pad_token": "[PAD]",
943
  "sep_token": "[SEP]",
944
  "tokenizer_class": "PreTrainedTokenizerFast",
 
938
  "input_ids",
939
  "attention_mask"
940
  ],
941
+ "model_max_length": 8192,
942
  "pad_token": "[PAD]",
943
  "sep_token": "[SEP]",
944
  "tokenizer_class": "PreTrainedTokenizerFast",