Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +448 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 384,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- dataset_size:100000
|
| 8 |
+
- loss:MultipleNegativesRankingLoss
|
| 9 |
+
base_model: YujinPang/docemb_M3_1
|
| 10 |
+
widget:
|
| 11 |
+
- source_sentence: 'Course structure
|
| 12 |
+
|
| 13 |
+
Mechatronics students take courses in various fields:'
|
| 14 |
+
sentences:
|
| 15 |
+
- Robotics is one of the newest emerging subfield of mechatronics. It is the study
|
| 16 |
+
of robots that how they are manufactured and operated. Since 2000, this branch
|
| 17 |
+
of mechatronics is attracting a number of aspirants. Robotics is interrelated
|
| 18 |
+
with automation because here also not much human intervention is required. A large
|
| 19 |
+
number of factories especially in automobile factories, robots are founds in assembly
|
| 20 |
+
lines where they perform the job of drilling, installation and fitting. Programming
|
| 21 |
+
skills are necessary for specialization in robotics. Knowledge of programming
|
| 22 |
+
language —ROBOTC is important for functioning robots. An industrial robot is a
|
| 23 |
+
prime example of a mechatronics system; it includes aspects of electronics, mechanics,
|
| 24 |
+
and computing to do its day-to-day jobs.
|
| 25 |
+
- "Melting and boiling points \nMelting and boiling points, typically expressed\
|
| 26 |
+
\ in degrees Celsius at a pressure of one atmosphere, are commonly used in characterizing\
|
| 27 |
+
\ the various elements. While known for most elements, either or both of these\
|
| 28 |
+
\ measurements is still undetermined for some of the radioactive elements available\
|
| 29 |
+
\ in only tiny quantities. Since helium remains a liquid even at absolute zero\
|
| 30 |
+
\ at atmospheric pressure, it has only a boiling point, and not a melting point,\
|
| 31 |
+
\ in conventional presentations."
|
| 32 |
+
- Capsicum chili peppers are commonly used to add pungency in cuisines worldwide.
|
| 33 |
+
The range of pepper heat reflected by a Scoville score is from 500 or less (sweet
|
| 34 |
+
peppers) to over 2.6 million (Pepper X) (table below; Scoville scales for individual
|
| 35 |
+
chili peppers are in the respective linked article). Some peppers such as the
|
| 36 |
+
Guntur chilli and Rocoto are excluded from the list due to their very wide SHU
|
| 37 |
+
range. Others such as Dragon's Breath and Chocolate 7-pot have not been officially
|
| 38 |
+
verified.
|
| 39 |
+
- source_sentence: In contrast to the South Pole neutrino telescopes AMANDA and IceCube,
|
| 40 |
+
ANTARES uses water instead of ice as its Cherenkov medium. As light in water is
|
| 41 |
+
less scattered than in ice this results in a better resolving power. On the other
|
| 42 |
+
hand, water contains more sources of background light than ice (radioactive isotopes
|
| 43 |
+
potassium-40 in the sea salt and bioluminescent organisms), leading to a higher
|
| 44 |
+
energy thresholds for ANTARES with respect to IceCube and making more sophisticated
|
| 45 |
+
background-suppression methods necessary.
|
| 46 |
+
sentences:
|
| 47 |
+
- Deployment and connection of the detector are performed in cooperation with the
|
| 48 |
+
French oceanographic institute, IFREMER, currently using the ROV Victor, and for
|
| 49 |
+
some past operations the submarine Nautile.
|
| 50 |
+
- To distinguish the other types of multithreading from SMT, the term "temporal
|
| 51 |
+
multithreading" is used to denote when instructions from only one thread can be
|
| 52 |
+
issued at a time.
|
| 53 |
+
- The two most important classes of divergences are the f-divergences and Bregman
|
| 54 |
+
divergences; however, other types of divergence functions are also encountered
|
| 55 |
+
in the literature. The only divergence that is both an f-divergence and a Bregman
|
| 56 |
+
divergence is the Kullback–Leibler divergence; the squared Euclidean divergence
|
| 57 |
+
is a Bregman divergence (corresponding to the function ) but not an f-divergence.
|
| 58 |
+
- source_sentence: The term "hyperbolic geometry" was introduced by Felix Klein in
|
| 59 |
+
1871. Klein followed an initiative of Arthur Cayley to use the transformations
|
| 60 |
+
of projective geometry to produce isometries. The idea used a conic section or
|
| 61 |
+
quadric to define a region, and used cross ratio to define a metric. The projective
|
| 62 |
+
transformations that leave the conic section or quadric stable are the isometries.
|
| 63 |
+
"Klein showed that if the Cayley absolute is a real curve then the part of the
|
| 64 |
+
projective plane in its interior is isometric to the hyperbolic plane..."
|
| 65 |
+
sentences:
|
| 66 |
+
- The mathematics is not difficult but is intertwined so the following is only a
|
| 67 |
+
brief sketch. Starting with a non-symmetric tensor , the Lagrangian density is
|
| 68 |
+
split into
|
| 69 |
+
- 'Because Euclidean, hyperbolic and elliptic geometry are all consistent, the question
|
| 70 |
+
arises: which is the real geometry of space, and if it is hyperbolic or elliptic,
|
| 71 |
+
what is its curvature?'
|
| 72 |
+
- Wind farm waste is less toxic than other garbage. Wind turbine blades represent
|
| 73 |
+
only a fraction of overall waste in the US, according to the Wind-industry trade
|
| 74 |
+
association, American Wind Energy Association.
|
| 75 |
+
- source_sentence: 'The StyleGAN-2-ADA paper points out a further point on data augmentation:
|
| 76 |
+
it must be invertible. Continue with the example of generating ImageNet pictures.
|
| 77 |
+
If the data augmentation is "randomly rotate the picture by 0, 90, 180, 270 degrees
|
| 78 |
+
with equal probability", then there is no way for the generator to know which
|
| 79 |
+
is the true orientation: Consider two generators , such that for any latent ,
|
| 80 |
+
the generated image is a 90-degree rotation of . They would have exactly the
|
| 81 |
+
same expected loss, and so neither is preferred over the other.'
|
| 82 |
+
sentences:
|
| 83 |
+
- The key method to distinguish between these different models involves study of
|
| 84 |
+
the particles' interactions ("coupling") and exact decay processes ("branching
|
| 85 |
+
ratios"), which can be measured and tested experimentally in particle collisions.
|
| 86 |
+
In the Type-I 2HDM model one Higgs doublet couples to up and down quarks, while
|
| 87 |
+
the second doublet does not couple to quarks. This model has two interesting limits,
|
| 88 |
+
in which the lightest Higgs couples to just fermions ("gauge-phobic") or just
|
| 89 |
+
gauge bosons ("fermiophobic"), but not both. In the Type-II 2HDM model, one Higgs
|
| 90 |
+
doublet only couples to up-type quarks, the other only couples to down-type quarks.
|
| 91 |
+
The heavily researched Minimal Supersymmetric Standard Model (MSSM) includes a
|
| 92 |
+
Type-II 2HDM Higgs sector, so it could be disproven by evidence of a Type-I 2HDM
|
| 93 |
+
Higgs.
|
| 94 |
+
- "Model variants \nSeveral different model variants of the S4 are sold, with most\
|
| 95 |
+
\ variants varying mainly in handling regional network types and bands. To prevent\
|
| 96 |
+
\ grey market reselling, models of the S4 manufactured after July 2013 implement\
|
| 97 |
+
\ a regional lockout system in certain regions, requiring that the first SIM card\
|
| 98 |
+
\ used on a European and North American model be from a carrier in that region.\
|
| 99 |
+
\ Samsung stated that the lock would be removed once a local SIM card is used.\
|
| 100 |
+
\ SIM format for all variants is Micro-SIM, which can have one or two depending\
|
| 101 |
+
\ on model."
|
| 102 |
+
- Another inspiration for GANs was noise-contrastive estimation, which uses the
|
| 103 |
+
same loss function as GANs and which Goodfellow studied during his PhD in 2010–2014.
|
| 104 |
+
- source_sentence: The final step for the BoW model is to convert vector-represented
|
| 105 |
+
patches to "codewords" (analogous to words in text documents), which also produces
|
| 106 |
+
a "codebook" (analogy to a word dictionary). A codeword can be considered as a
|
| 107 |
+
representative of several similar patches. One simple method is performing k-means
|
| 108 |
+
clustering over all the vectors. Codewords are then defined as the centers of
|
| 109 |
+
the learned clusters. The number of the clusters is the codebook size (analogous
|
| 110 |
+
to the size of the word dictionary).
|
| 111 |
+
sentences:
|
| 112 |
+
- Pathria retired from the University of Waterloo in August 1998 and, soon thereafter,
|
| 113 |
+
moved to the west coast of the US and became an adjunct professor of physics at
|
| 114 |
+
the University of California at San Diego – a position he continued to hold till
|
| 115 |
+
2010. In 2009, Pathria's newest publishers (Elsevier/Academic) prevailed upon
|
| 116 |
+
him to produce a third edition of this book. He now sought the help of Paul Beale,
|
| 117 |
+
of the University of Colorado at Boulder, whose co-authorship resulted in another
|
| 118 |
+
brand new edition in March 2011. Ten years later, in 2021, Pathria and Beale produced
|
| 119 |
+
a fourth edition of this book.
|
| 120 |
+
- 'C++
|
| 121 |
+
|
| 122 |
+
In the 1970s, software engineers needed language support to break large projects
|
| 123 |
+
down into modules. One obvious feature was to decompose large projects physically
|
| 124 |
+
into separate files. A less obvious feature was to decompose large projects logically
|
| 125 |
+
into abstract datatypes. At the time, languages supported concrete (scalar) datatypes
|
| 126 |
+
like integer numbers, floating-point numbers, and strings of characters. Abstract
|
| 127 |
+
datatypes are structures of concrete datatypes, with a new name assigned. For
|
| 128 |
+
example, a list of integers could be called integer_list.'
|
| 129 |
+
- "External links\n Bag of Visual Words in a Nutshell a short tutorial by Bethea\
|
| 130 |
+
\ Davida. A demo for two bag-of-words classifiers by L. Fei-Fei, R. Fergus, and\
|
| 131 |
+
\ A. Torralba. Caltech Large Scale Image Search Toolbox: a Matlab/C++ toolbox\
|
| 132 |
+
\ implementing Inverted File search for Bag of Words model. It also contains implementations\
|
| 133 |
+
\ for fast approximate nearest neighbor search using randomized k-d tree, locality-sensitive\
|
| 134 |
+
\ hashing, and hierarchical k-means. DBoW2 library: a library that implements\
|
| 135 |
+
\ a fast bag of words in C++ with support for OpenCV."
|
| 136 |
+
pipeline_tag: sentence-similarity
|
| 137 |
+
library_name: sentence-transformers
|
| 138 |
+
---
|
| 139 |
+
|
| 140 |
+
# SentenceTransformer based on YujinPang/docemb_M3_1
|
| 141 |
+
|
| 142 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [YujinPang/docemb_M3_1](https://huggingface.co/YujinPang/docemb_M3_1). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 143 |
+
|
| 144 |
+
## Model Details
|
| 145 |
+
|
| 146 |
+
### Model Description
|
| 147 |
+
- **Model Type:** Sentence Transformer
|
| 148 |
+
- **Base model:** [YujinPang/docemb_M3_1](https://huggingface.co/YujinPang/docemb_M3_1) <!-- at revision 258eb8caf51c50eb52e628dd96c8d818f0aaf078 -->
|
| 149 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 150 |
+
- **Output Dimensionality:** 384 dimensions
|
| 151 |
+
- **Similarity Function:** Cosine Similarity
|
| 152 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 153 |
+
<!-- - **Language:** Unknown -->
|
| 154 |
+
<!-- - **License:** Unknown -->
|
| 155 |
+
|
| 156 |
+
### Model Sources
|
| 157 |
+
|
| 158 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 159 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 160 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 161 |
+
|
| 162 |
+
### Full Model Architecture
|
| 163 |
+
|
| 164 |
+
```
|
| 165 |
+
SentenceTransformer(
|
| 166 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
| 167 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 168 |
+
(2): Normalize()
|
| 169 |
+
)
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
## Usage
|
| 173 |
+
|
| 174 |
+
### Direct Usage (Sentence Transformers)
|
| 175 |
+
|
| 176 |
+
First install the Sentence Transformers library:
|
| 177 |
+
|
| 178 |
+
```bash
|
| 179 |
+
pip install -U sentence-transformers
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
Then you can load this model and run inference.
|
| 183 |
+
```python
|
| 184 |
+
from sentence_transformers import SentenceTransformer
|
| 185 |
+
|
| 186 |
+
# Download from the 🤗 Hub
|
| 187 |
+
model = SentenceTransformer("YujinPang/docemb_M3_1_9")
|
| 188 |
+
# Run inference
|
| 189 |
+
sentences = [
|
| 190 |
+
'The final step for the BoW model is to convert vector-represented patches to "codewords" (analogous to words in text documents), which also produces a "codebook" (analogy to a word dictionary). A codeword can be considered as a representative of several similar patches. One simple method is performing k-means clustering over all the vectors. Codewords are then defined as the centers of the learned clusters. The number of the clusters is the codebook size (analogous to the size of the word dictionary).',
|
| 191 |
+
'External links\n Bag of Visual Words in a Nutshell a short tutorial by Bethea Davida. A demo for two bag-of-words classifiers by L. Fei-Fei, R. Fergus, and A. Torralba. Caltech Large Scale Image Search Toolbox: a Matlab/C++ toolbox implementing Inverted File search for Bag of Words model. It also contains implementations for fast approximate nearest neighbor search using randomized k-d tree, locality-sensitive hashing, and hierarchical k-means. DBoW2 library: a library that implements a fast bag of words in C++ with support for OpenCV.',
|
| 192 |
+
'C++\nIn the 1970s, software engineers needed language support to break large projects down into modules. One obvious feature was to decompose large projects physically into separate files. A less obvious feature was to decompose large projects logically into abstract datatypes. At the time, languages supported concrete (scalar) datatypes like integer numbers, floating-point numbers, and strings of characters. Abstract datatypes are structures of concrete datatypes, with a new name assigned. For example, a list of integers could be called integer_list.',
|
| 193 |
+
]
|
| 194 |
+
embeddings = model.encode(sentences)
|
| 195 |
+
print(embeddings.shape)
|
| 196 |
+
# [3, 384]
|
| 197 |
+
|
| 198 |
+
# Get the similarity scores for the embeddings
|
| 199 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 200 |
+
print(similarities.shape)
|
| 201 |
+
# [3, 3]
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
<!--
|
| 205 |
+
### Direct Usage (Transformers)
|
| 206 |
+
|
| 207 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 208 |
+
|
| 209 |
+
</details>
|
| 210 |
+
-->
|
| 211 |
+
|
| 212 |
+
<!--
|
| 213 |
+
### Downstream Usage (Sentence Transformers)
|
| 214 |
+
|
| 215 |
+
You can finetune this model on your own dataset.
|
| 216 |
+
|
| 217 |
+
<details><summary>Click to expand</summary>
|
| 218 |
+
|
| 219 |
+
</details>
|
| 220 |
+
-->
|
| 221 |
+
|
| 222 |
+
<!--
|
| 223 |
+
### Out-of-Scope Use
|
| 224 |
+
|
| 225 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 226 |
+
-->
|
| 227 |
+
|
| 228 |
+
<!--
|
| 229 |
+
## Bias, Risks and Limitations
|
| 230 |
+
|
| 231 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 232 |
+
-->
|
| 233 |
+
|
| 234 |
+
<!--
|
| 235 |
+
### Recommendations
|
| 236 |
+
|
| 237 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 238 |
+
-->
|
| 239 |
+
|
| 240 |
+
## Training Details
|
| 241 |
+
|
| 242 |
+
### Training Dataset
|
| 243 |
+
|
| 244 |
+
#### Unnamed Dataset
|
| 245 |
+
|
| 246 |
+
* Size: 100,000 training samples
|
| 247 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
| 248 |
+
* Approximate statistics based on the first 1000 samples:
|
| 249 |
+
| | sentence_0 | sentence_1 |
|
| 250 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 251 |
+
| type | string | string |
|
| 252 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 96.25 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 93.51 tokens</li><li>max: 256 tokens</li></ul> |
|
| 253 |
+
* Samples:
|
| 254 |
+
| sentence_0 | sentence_1 |
|
| 255 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 256 |
+
| <code>The character has been portrayed by Silas Carson in Episodes I-III, and voiced by Tom Kenny in The Clone Wars.</code> | <code>The character has been voiced by Dee Bradley Baker in The Clone Wars and The Bad Batch.</code> |
|
| 257 |
+
| <code>Abdomen <br>The muscles of the abdominal wall are subdivided into a superficial and a deep group.</code> | <code>The muscles of the hip are divided into a dorsal and a ventral group.</code> |
|
| 258 |
+
| <code>Resonant frequency<br>When placed in a magnetic field, NMR active nuclei (such as 1H or 13C) absorb electromagnetic radiation at a frequency characteristic of the isotope. The resonant frequency, energy of the radiation absorbed, and the intensity of the signal are proportional to the strength of the magnetic field. For example, in a 21 Tesla magnetic field, hydrogen nuclei (commonly referred to as protons) resonate at 900 MHz. It is common to refer to a 21 T magnet as a 900 MHz magnet since hydrogen is the most common nucleus detected. However, different nuclei will resonate at different frequencies at this field strength in proportion to their nuclear magnetic moments.</code> | <code>Spectral interpretation<br>NMR signals are ordinarily characterized by three variables: chemical shift, spin-spin coupling, and relaxation time.</code> |
|
| 259 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 260 |
+
```json
|
| 261 |
+
{
|
| 262 |
+
"scale": 20.0,
|
| 263 |
+
"similarity_fct": "cos_sim"
|
| 264 |
+
}
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
### Training Hyperparameters
|
| 268 |
+
#### Non-Default Hyperparameters
|
| 269 |
+
|
| 270 |
+
- `per_device_train_batch_size`: 256
|
| 271 |
+
- `per_device_eval_batch_size`: 256
|
| 272 |
+
- `num_train_epochs`: 1
|
| 273 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 274 |
+
|
| 275 |
+
#### All Hyperparameters
|
| 276 |
+
<details><summary>Click to expand</summary>
|
| 277 |
+
|
| 278 |
+
- `overwrite_output_dir`: False
|
| 279 |
+
- `do_predict`: False
|
| 280 |
+
- `eval_strategy`: no
|
| 281 |
+
- `prediction_loss_only`: True
|
| 282 |
+
- `per_device_train_batch_size`: 256
|
| 283 |
+
- `per_device_eval_batch_size`: 256
|
| 284 |
+
- `per_gpu_train_batch_size`: None
|
| 285 |
+
- `per_gpu_eval_batch_size`: None
|
| 286 |
+
- `gradient_accumulation_steps`: 1
|
| 287 |
+
- `eval_accumulation_steps`: None
|
| 288 |
+
- `torch_empty_cache_steps`: None
|
| 289 |
+
- `learning_rate`: 5e-05
|
| 290 |
+
- `weight_decay`: 0.0
|
| 291 |
+
- `adam_beta1`: 0.9
|
| 292 |
+
- `adam_beta2`: 0.999
|
| 293 |
+
- `adam_epsilon`: 1e-08
|
| 294 |
+
- `max_grad_norm`: 1
|
| 295 |
+
- `num_train_epochs`: 1
|
| 296 |
+
- `max_steps`: -1
|
| 297 |
+
- `lr_scheduler_type`: linear
|
| 298 |
+
- `lr_scheduler_kwargs`: {}
|
| 299 |
+
- `warmup_ratio`: 0.0
|
| 300 |
+
- `warmup_steps`: 0
|
| 301 |
+
- `log_level`: passive
|
| 302 |
+
- `log_level_replica`: warning
|
| 303 |
+
- `log_on_each_node`: True
|
| 304 |
+
- `logging_nan_inf_filter`: True
|
| 305 |
+
- `save_safetensors`: True
|
| 306 |
+
- `save_on_each_node`: False
|
| 307 |
+
- `save_only_model`: False
|
| 308 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 309 |
+
- `no_cuda`: False
|
| 310 |
+
- `use_cpu`: False
|
| 311 |
+
- `use_mps_device`: False
|
| 312 |
+
- `seed`: 42
|
| 313 |
+
- `data_seed`: None
|
| 314 |
+
- `jit_mode_eval`: False
|
| 315 |
+
- `use_ipex`: False
|
| 316 |
+
- `bf16`: False
|
| 317 |
+
- `fp16`: False
|
| 318 |
+
- `fp16_opt_level`: O1
|
| 319 |
+
- `half_precision_backend`: auto
|
| 320 |
+
- `bf16_full_eval`: False
|
| 321 |
+
- `fp16_full_eval`: False
|
| 322 |
+
- `tf32`: None
|
| 323 |
+
- `local_rank`: 0
|
| 324 |
+
- `ddp_backend`: None
|
| 325 |
+
- `tpu_num_cores`: None
|
| 326 |
+
- `tpu_metrics_debug`: False
|
| 327 |
+
- `debug`: []
|
| 328 |
+
- `dataloader_drop_last`: False
|
| 329 |
+
- `dataloader_num_workers`: 0
|
| 330 |
+
- `dataloader_prefetch_factor`: None
|
| 331 |
+
- `past_index`: -1
|
| 332 |
+
- `disable_tqdm`: False
|
| 333 |
+
- `remove_unused_columns`: True
|
| 334 |
+
- `label_names`: None
|
| 335 |
+
- `load_best_model_at_end`: False
|
| 336 |
+
- `ignore_data_skip`: False
|
| 337 |
+
- `fsdp`: []
|
| 338 |
+
- `fsdp_min_num_params`: 0
|
| 339 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 340 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 341 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 342 |
+
- `deepspeed`: None
|
| 343 |
+
- `label_smoothing_factor`: 0.0
|
| 344 |
+
- `optim`: adamw_torch
|
| 345 |
+
- `optim_args`: None
|
| 346 |
+
- `adafactor`: False
|
| 347 |
+
- `group_by_length`: False
|
| 348 |
+
- `length_column_name`: length
|
| 349 |
+
- `ddp_find_unused_parameters`: None
|
| 350 |
+
- `ddp_bucket_cap_mb`: None
|
| 351 |
+
- `ddp_broadcast_buffers`: False
|
| 352 |
+
- `dataloader_pin_memory`: True
|
| 353 |
+
- `dataloader_persistent_workers`: False
|
| 354 |
+
- `skip_memory_metrics`: True
|
| 355 |
+
- `use_legacy_prediction_loop`: False
|
| 356 |
+
- `push_to_hub`: False
|
| 357 |
+
- `resume_from_checkpoint`: None
|
| 358 |
+
- `hub_model_id`: None
|
| 359 |
+
- `hub_strategy`: every_save
|
| 360 |
+
- `hub_private_repo`: None
|
| 361 |
+
- `hub_always_push`: False
|
| 362 |
+
- `gradient_checkpointing`: False
|
| 363 |
+
- `gradient_checkpointing_kwargs`: None
|
| 364 |
+
- `include_inputs_for_metrics`: False
|
| 365 |
+
- `include_for_metrics`: []
|
| 366 |
+
- `eval_do_concat_batches`: True
|
| 367 |
+
- `fp16_backend`: auto
|
| 368 |
+
- `push_to_hub_model_id`: None
|
| 369 |
+
- `push_to_hub_organization`: None
|
| 370 |
+
- `mp_parameters`:
|
| 371 |
+
- `auto_find_batch_size`: False
|
| 372 |
+
- `full_determinism`: False
|
| 373 |
+
- `torchdynamo`: None
|
| 374 |
+
- `ray_scope`: last
|
| 375 |
+
- `ddp_timeout`: 1800
|
| 376 |
+
- `torch_compile`: False
|
| 377 |
+
- `torch_compile_backend`: None
|
| 378 |
+
- `torch_compile_mode`: None
|
| 379 |
+
- `include_tokens_per_second`: False
|
| 380 |
+
- `include_num_input_tokens_seen`: False
|
| 381 |
+
- `neftune_noise_alpha`: None
|
| 382 |
+
- `optim_target_modules`: None
|
| 383 |
+
- `batch_eval_metrics`: False
|
| 384 |
+
- `eval_on_start`: False
|
| 385 |
+
- `use_liger_kernel`: False
|
| 386 |
+
- `eval_use_gather_object`: False
|
| 387 |
+
- `average_tokens_across_devices`: False
|
| 388 |
+
- `prompts`: None
|
| 389 |
+
- `batch_sampler`: batch_sampler
|
| 390 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 391 |
+
|
| 392 |
+
</details>
|
| 393 |
+
|
| 394 |
+
### Framework Versions
|
| 395 |
+
- Python: 3.10.11
|
| 396 |
+
- Sentence Transformers: 4.1.0
|
| 397 |
+
- Transformers: 4.52.3
|
| 398 |
+
- PyTorch: 2.7.0+cu126
|
| 399 |
+
- Accelerate: 1.7.0
|
| 400 |
+
- Datasets: 3.6.0
|
| 401 |
+
- Tokenizers: 0.21.1
|
| 402 |
+
|
| 403 |
+
## Citation
|
| 404 |
+
|
| 405 |
+
### BibTeX
|
| 406 |
+
|
| 407 |
+
#### Sentence Transformers
|
| 408 |
+
```bibtex
|
| 409 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 410 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 411 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 412 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 413 |
+
month = "11",
|
| 414 |
+
year = "2019",
|
| 415 |
+
publisher = "Association for Computational Linguistics",
|
| 416 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 417 |
+
}
|
| 418 |
+
```
|
| 419 |
+
|
| 420 |
+
#### MultipleNegativesRankingLoss
|
| 421 |
+
```bibtex
|
| 422 |
+
@misc{henderson2017efficient,
|
| 423 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 424 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 425 |
+
year={2017},
|
| 426 |
+
eprint={1705.00652},
|
| 427 |
+
archivePrefix={arXiv},
|
| 428 |
+
primaryClass={cs.CL}
|
| 429 |
+
}
|
| 430 |
+
```
|
| 431 |
+
|
| 432 |
+
<!--
|
| 433 |
+
## Glossary
|
| 434 |
+
|
| 435 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 436 |
+
-->
|
| 437 |
+
|
| 438 |
+
<!--
|
| 439 |
+
## Model Card Authors
|
| 440 |
+
|
| 441 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 442 |
+
-->
|
| 443 |
+
|
| 444 |
+
<!--
|
| 445 |
+
## Model Card Contact
|
| 446 |
+
|
| 447 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 448 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 384,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 1536,
|
| 13 |
+
"layer_norm_eps": 1e-12,
|
| 14 |
+
"max_position_embeddings": 512,
|
| 15 |
+
"model_type": "bert",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 6,
|
| 18 |
+
"pad_token_id": 0,
|
| 19 |
+
"position_embedding_type": "absolute",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.52.3",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.1.0",
|
| 4 |
+
"transformers": "4.52.3",
|
| 5 |
+
"pytorch": "2.7.0+cu126"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:533e7338d62f28472477172d303aad9b8b84c399f5797b7c77f319b5804315e0
|
| 3 |
+
size 90864192
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"max_length": 128,
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
+
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
+
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
+
"strip_accents": null,
|
| 60 |
+
"tokenize_chinese_chars": true,
|
| 61 |
+
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "[UNK]"
|
| 65 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|