redis
/

langcache-embed-experimental

@@ -1,7 +1,7 @@
 {
-    "word_embedding_dimension": 768,
-    "pooling_mode_cls_token": true,
-    "pooling_mode_mean_tokens": false,
     "pooling_mode_max_tokens": false,
     "pooling_mode_mean_sqrt_len_tokens": false,
     "pooling_mode_weightedmean_tokens": false,

 {
+    "word_embedding_dimension": 1024,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
     "pooling_mode_max_tokens": false,
     "pooling_mode_mean_sqrt_len_tokens": false,
     "pooling_mode_weightedmean_tokens": false,

README.md CHANGED Viewed

@@ -12,54 +12,51 @@ tags:
 - retrieval
 - reranking
 - generated_from_trainer
-- dataset_size:8193277
-- loss:CosineSimilarityLoss
-base_model: Alibaba-NLP/gte-modernbert-base
 widget:
-- source_sentence: They are sometimes called Marg or also Path in Hindi .
   sentences:
-  - Largs was born in Brisbane House in Noddsdale , near Brisbane in Ayrshire , Scotland
-    , the son of Sir Thomas Brisbane and Dame Eleanora Brisbane .
-  - Its smallest radius is 1.4 ( 131 thousand light years ) and largest 0.7 angle
-    minutes ( 65 thousand light years ) .
-  - They are also called Marg or sometimes the path in the Hindi .
-- source_sentence: The main mode of play in `` Crash Bash `` is the Adventure Mode
-    , in which one or two players must win all 28 levels to complete .
   sentences:
-  - Parkton is a city in Robeson County , North Carolina , in the Lumberton Metro
-    area , in the United States .
-  - The CANTAB tests were developed by Professor Barbara Sahakian and Professor Trevor
-    Robbins .
-  - The main mode in `` Crash Bash `` is the adventure mode in which one or two players
-    must complete all 28 levels to win .
-- source_sentence: It was formed in December 2014 from elements of the disbanded 51st
-    Mechanized Brigade and newly mobilized units .
   sentences:
-  - It had branches in feature films , television , physical and digital publishing
-    , merchandise , recorded music , digital and online media applications and mobile
-    and social games .
-  - Notts County and Arsenal were relegated to the Second Division ; Preston North
-    End and Burnley were promoted to the First Division .
-  - It was formed in December 2014 from elements of the dissolved 51st Mechanized
-    Brigade and newly mobilized units .
-- source_sentence: The band pursued `` signals `` in January 2012 in three weeks ,
-    and drums were recorded in a day and a half .
   sentences:
-  - Kearsarge Lakes , Kearsarge Pass Trail , and Rae Lakes all have a maximum 2 nights
-    stay , and Bullfrog Lake along the Charlotte Lake is closed to camping .
-  - The band tracked `` Signals `` in three weeks in January 2012 . Drums were recorded
-    in a day and a half .
-  - From 1954 to 1961 , he was married to Stella Caralis and from 1978 until his death
-    with Nina Bohlen .
-- source_sentence: A special case is of the Country B loyalist who controls agents
-    or provides managerial supporting or other functions against Country A .
   sentences:
-  - A special case is the loyalist of Country B , who controls agents or provides
-    management support or other functions against Country A .
-  - Music Story is a music service website and international music data provider that
-    curates , aggregates and analyses metadata for digital music services .
-  - These six cars were painted in the same lacquering as the buffet cars , silver
-    with red lines and text .
 datasets:
 - redis/langcache-sentencepairs-v2
 pipeline_tag: sentence-similarity
@@ -84,42 +81,42 @@ model-index:
       type: test
     metrics:
     - type: cosine_accuracy@1
-      value: 0.5861241448475948
       name: Cosine Accuracy@1
     - type: cosine_precision@1
-      value: 0.5861241448475948
       name: Cosine Precision@1
     - type: cosine_recall@1
-      value: 0.5679885764966713
       name: Cosine Recall@1
     - type: cosine_ndcg@10
-      value: 0.7729838064849864
       name: Cosine Ndcg@10
     - type: cosine_mrr@1
-      value: 0.5861241448475948
       name: Cosine Mrr@1
     - type: cosine_map@100
-      value: 0.7216697804426214
       name: Cosine Map@100
     - type: cosine_auc_precision_cache_hit_ratio
-      value: 0.34889374678008206
       name: Cosine Auc Precision Cache Hit Ratio
     - type: cosine_auc_similarity_distribution
-      value: 0.15252445696542008
       name: Cosine Auc Similarity Distribution
 ---
 # Redis fine-tuned BiEncoder model for semantic caching on LangCache
-This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) on the [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for sentence pair similarity.
 ## Model Details
 ### Model Description
 - **Model Type:** Sentence Transformer
-- **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
 - **Maximum Sequence Length:** 100 tokens
-- **Output Dimensionality:** 768 dimensions
 - **Similarity Function:** Cosine Similarity
 - **Training Dataset:**
     - [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
@@ -137,7 +134,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
 ```
 SentenceTransformer(
   (0): Transformer({'max_seq_length': 100, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
-  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 )
 ```
@@ -159,20 +156,20 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("redis/langcache-embed-experimental")
 # Run inference
 sentences = [
-    'A special case is of the Country B loyalist who controls agents or provides managerial supporting or other functions against Country A .',
-    'A special case is the loyalist of Country B , who controls agents or provides management support or other functions against Country A .',
-    'Music Story is a music service website and international music data provider that curates , aggregates and analyses metadata for digital music services .',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
-# [3, 768]
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.9844, 0.5195],
-#         [0.9844, 0.9922, 0.5078],
-#         [0.5195, 0.5078, 0.9922]], dtype=torch.bfloat16)
 ```
 <!--
@@ -208,16 +205,16 @@ You can finetune this model on your own dataset.
 * Dataset: `test`
 * Evaluated with <code>ir_evaluator.CustomInformationRetrievalEvaluator</code>
-| Metric                               | Value     |
-|:-------------------------------------|:----------|
-| cosine_accuracy@1                    | 0.5861    |
-| cosine_precision@1                   | 0.5861    |
-| cosine_recall@1                      | 0.568     |
-| **cosine_ndcg@10**                   | **0.773** |
-| cosine_mrr@1                         | 0.5861    |
-| cosine_map@100                       | 0.7217    |
-| cosine_auc_precision_cache_hit_ratio | 0.3489    |
-| cosine_auc_similarity_distribution   | 0.1525    |
 <!--
 ## Bias, Risks and Limitations
@@ -238,23 +235,25 @@ You can finetune this model on your own dataset.
 #### LangCache Sentence Pairs (all)
 * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
-* Size: 72,021 training samples
-* Columns: <code>sentence_a</code>, <code>sentence_b</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | sentence_a                                                                        | sentence_b                                                                        | label                                           |
-  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
-  | type    | string                                                                            | string                                                                            | int                                             |
-  | details | <ul><li>min: 8 tokens</li><li>mean: 27.46 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 27.36 tokens</li><li>max: 52 tokens</li></ul> | <ul><li>0: ~50.30%</li><li>1: ~49.70%</li></ul> |
 * Samples:
-  | sentence_a                                                                                                                                  | sentence_b                                                                                                                                    | label          |
-  |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
-  | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code>                        | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code>                           | <code>1</code> |
-  | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> | <code>0</code> |
-  | <code>After losing his second election , he resigned as opposition leader and was replaced by Geoff Pearsall .</code>                       | <code>Max Bingham resigned as opposition leader after losing his second election , and was replaced by Geoff Pearsall .</code>                | <code>1</code> |
-* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
   ```json
   {
-      "loss_fct": "torch.nn.modules.loss.BCELoss"
   }
   ```
@@ -263,30 +262,32 @@ You can finetune this model on your own dataset.
 #### LangCache Sentence Pairs (all)
 * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
-* Size: 72,021 evaluation samples
-* Columns: <code>sentence_a</code>, <code>sentence_b</code>, and <code>label</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | sentence_a                                                                        | sentence_b                                                                        | label                                           |
-  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
-  | type    | string                                                                            | string                                                                            | int                                             |
-  | details | <ul><li>min: 8 tokens</li><li>mean: 27.46 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 27.36 tokens</li><li>max: 52 tokens</li></ul> | <ul><li>0: ~50.30%</li><li>1: ~49.70%</li></ul> |
 * Samples:
-  | sentence_a                                                                                                                                  | sentence_b                                                                                                                                    | label          |
-  |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
-  | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code>                        | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code>                           | <code>1</code> |
-  | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> | <code>0</code> |
-  | <code>After losing his second election , he resigned as opposition leader and was replaced by Geoff Pearsall .</code>                       | <code>Max Bingham resigned as opposition leader after losing his second election , and was replaced by Geoff Pearsall .</code>                | <code>1</code> |
-* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
   ```json
   {
-      "loss_fct": "torch.nn.modules.loss.BCELoss"
   }
   ```
 ### Training Logs
 | Epoch | Step | test_cosine_ndcg@10 |
 |:-----:|:----:|:-------------------:|
-| -1    | -1   | 0.7730              |
 ### Framework Versions

 - retrieval
 - reranking
 - generated_from_trainer
+- dataset_size:9233417
+- loss:ArcFaceInBatchLoss
+base_model: answerdotai/ModernBERT-large
 widget:
+- source_sentence: Hayley Vaughan portrayed Ripa on the ABC daytime soap opera , ``
+    All My Children `` , between 1990 and 2002 .
   sentences:
+  - Traxxpad is a music application for Sony 's PlayStation Portable published by
+    Definitive Studios and developed by Eidos Interactive .
+  - Between 1990 and 2002 , Hayley Vaughan Ripa portrayed in the ABC soap opera ``
+    All My Children `` .
+  - Between 1990 and 2002 , Ripa Hayley portrayed Vaughan in the ABC soap opera ``
+    All My Children `` .
+- source_sentence: Olivella monilifera is a species of dwarf sea snail , small gastropod
+    mollusk in the family Olivellidae , the marine olives .
   sentences:
+  - Olivella monilifera is a species of the dwarf - sea snail , small gastropod mollusk
+    in the Olivellidae family , the marine olives .
+  - He was cut by the Browns after being signed by the Bills in 2013 . He was later
+    released .
+  - Olivella monilifera is a kind of sea snail , marine gastropod mollusk in the Olivellidae
+    family , the dwarf olives .
+- source_sentence: Hayashi said that Mackey `` is a sort of `` of the original model
+    for Tenchi .
   sentences:
+  - In the summer of 2009 , Ellick shot a documentary about Malala Yousafzai .
+  - Hayashi said that Mackey is `` sort of `` the original model for Tenchi .
+  - Mackey said that Hayashi is `` sort of `` the original model for Tenchi .
+- source_sentence: Much of the film was shot on location in Los Angeles and in nearby
+    Burbank and Glendale .
   sentences:
+  - Much of the film was shot on location in Los Angeles and in nearby Burbank and
+    Glendale .
+  - Much of the film was shot on site in Burbank and Glendale and in the nearby Los
+    Angeles .
+  - Traxxpad is a music application for the Sony PlayStation Portable developed by
+    the Definitive Studios and published by Eidos Interactive .
+- source_sentence: According to him , the earth is the carrier of his artistic work
+    , which is only integrated into the creative process by minimal changes .
   sentences:
+  - National players are Bold players .
+  - According to him , earth is the carrier of his artistic work being integrated
+    into the creative process only by minimal changes .
+  - According to him , earth is the carrier of his creative work being integrated
+    into the artistic process only by minimal changes .
 datasets:
 - redis/langcache-sentencepairs-v2
 pipeline_tag: sentence-similarity
       type: test
     metrics:
     - type: cosine_accuracy@1
+      value: 0.44081091729646477
       name: Cosine Accuracy@1
     - type: cosine_precision@1
+      value: 0.44081091729646477
       name: Cosine Precision@1
     - type: cosine_recall@1
+      value: 0.42663486382682986
       name: Cosine Recall@1
     - type: cosine_ndcg@10
+      value: 0.6274011007244752
       name: Cosine Ndcg@10
     - type: cosine_mrr@1
+      value: 0.44081091729646477
       name: Cosine Mrr@1
     - type: cosine_map@100
+      value: 0.5749605963252064
       name: Cosine Map@100
     - type: cosine_auc_precision_cache_hit_ratio
+      value: 0.27130175854619276
       name: Cosine Auc Precision Cache Hit Ratio
     - type: cosine_auc_similarity_distribution
+      value: 0.40770905754259995
       name: Cosine Auc Similarity Distribution
 ---
 # Redis fine-tuned BiEncoder model for semantic caching on LangCache
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large) on the [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2) dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for sentence pair similarity.
 ## Model Details
 ### Model Description
 - **Model Type:** Sentence Transformer
+- **Base model:** [answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large) <!-- at revision 45bb4654a4d5aaff24dd11d4781fa46d39bf8c13 -->
 - **Maximum Sequence Length:** 100 tokens
+- **Output Dimensionality:** 1024 dimensions
 - **Similarity Function:** Cosine Similarity
 - **Training Dataset:**
     - [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
 ```
 SentenceTransformer(
   (0): Transformer({'max_seq_length': 100, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
+  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 )
 ```
 model = SentenceTransformer("redis/langcache-embed-experimental")
 # Run inference
 sentences = [
+    'According to him , the earth is the carrier of his artistic work , which is only integrated into the creative process by minimal changes .',
+    'According to him , earth is the carrier of his artistic work being integrated into the creative process only by minimal changes .',
+    'According to him , earth is the carrier of his creative work being integrated into the artistic process only by minimal changes .',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
+# [3, 1024]
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.9844, 0.9844],
+#         [0.9844, 0.9961, 0.9922],
+#         [0.9844, 0.9922, 0.9961]], dtype=torch.bfloat16)
 ```
 <!--
 * Dataset: `test`
 * Evaluated with <code>ir_evaluator.CustomInformationRetrievalEvaluator</code>
+| Metric                               | Value      |
+|:-------------------------------------|:-----------|
+| cosine_accuracy@1                    | 0.4408     |
+| cosine_precision@1                   | 0.4408     |
+| cosine_recall@1                      | 0.4266     |
+| **cosine_ndcg@10**                   | **0.6274** |
+| cosine_mrr@1                         | 0.4408     |
+| cosine_map@100                       | 0.575      |
+| cosine_auc_precision_cache_hit_ratio | 0.2713     |
+| cosine_auc_similarity_distribution   | 0.4077     |
 <!--
 ## Bias, Risks and Limitations
 #### LangCache Sentence Pairs (all)
 * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
+* Size: 126,938 training samples
+* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                            | positive                                                                          | negative                                                                          |
+  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                            | string                                                                            |
+  | details | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 48 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 26.54 tokens</li><li>max: 61 tokens</li></ul> |
 * Samples:
+  | anchor                                                                                                                                      | positive                                                                                                                                      | negative                                                                                                                                      |
+  |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code>                        | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code>                           | <code>how can I get financial freedom as soon as possible?</code>                                                                             |
+  | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code>                         | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code>                          | <code>The older Punts are still very much in existence today and race in the same fleets as the newer boats .</code>                          |
+  | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley , , was located at Turner Valley Bar N Ranch Airport , southwest of Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> |
+* Loss: <code>losses.ArcFaceInBatchLoss</code> with these parameters:
   ```json
   {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
   }
   ```
 #### LangCache Sentence Pairs (all)
 * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v2)
+* Size: 126,938 evaluation samples
+* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                            | positive                                                                          | negative                                                                          |
+  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                            | string                                                                            |
+  | details | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 27.27 tokens</li><li>max: 48 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 26.54 tokens</li><li>max: 61 tokens</li></ul> |
 * Samples:
+  | anchor                                                                                                                                      | positive                                                                                                                                      | negative                                                                                                                                      |
+  |:--------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code>                        | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code>                           | <code>how can I get financial freedom as soon as possible?</code>                                                                             |
+  | <code>The newer punts are still very much in existence today and run in the same fleets as the older boats .</code>                         | <code>The newer Punts are still very much in existence today and race in the same fleets as the older boats .</code>                          | <code>The older Punts are still very much in existence today and race in the same fleets as the newer boats .</code>                          |
+  | <code>Turner Valley , was at the Turner Valley Bar N Ranch Airport , southwest of the Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley , , was located at Turner Valley Bar N Ranch Airport , southwest of Turner Valley Bar N Ranch , Alberta , Canada .</code> | <code>Turner Valley Bar N Ranch Airport , , was located at Turner Valley Bar N Ranch , southwest of Turner Valley , Alberta , Canada .</code> |
+* Loss: <code>losses.ArcFaceInBatchLoss</code> with these parameters:
   ```json
   {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
   }
   ```
 ### Training Logs
 | Epoch | Step | test_cosine_ndcg@10 |
 |:-----:|:----:|:-------------------:|
+| -1    | -1   | 0.6274              |
 ### Framework Versions

config.json CHANGED Viewed

@@ -19,10 +19,10 @@
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
-  "hidden_size": 768,
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
-  "intermediate_size": 1152,
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "local_rope_theta": 10000.0,
@@ -32,8 +32,8 @@
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
-  "num_attention_heads": 12,
-  "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "repad_logits_with_grad": false,

   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
+  "hidden_size": 1024,
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
+  "intermediate_size": 2624,
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "local_rope_theta": 10000.0,
   "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "repad_logits_with_grad": false,

config_sentence_transformers.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "__version__": {
     "sentence_transformers": "5.1.0",
     "transformers": "4.56.0",
@@ -9,6 +10,5 @@
     "document": ""
   },
   "default_prompt_name": null,
-  "similarity_fn_name": "cosine",
-  "model_type": "SentenceTransformer"
 }

 {
+  "model_type": "SentenceTransformer",
   "__version__": {
     "sentence_transformers": "5.1.0",
     "transformers": "4.56.0",
     "document": ""
   },
   "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95d02211c4cca89113f9f3e93ed91f5176bf50170faa2cb835f7bfea15bb9dd2
-size 298041696

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f84ab85561ef1687cbe92711a697e05cba61d13ec58fc3ec279bd64147715b8
+size 789580328

tokenizer_config.json CHANGED Viewed

@@ -938,7 +938,7 @@
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",

     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 8192,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "tokenizer_class": "PreTrainedTokenizerFast",