Update README.md
Browse files
README.md
CHANGED
|
@@ -3,498 +3,150 @@ tags:
|
|
| 3 |
- sentence-transformers
|
| 4 |
- sentence-similarity
|
| 5 |
- feature-extraction
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
base_model: Shuu12121/CodeModernBERT-Snake
|
| 10 |
-
widget:
|
| 11 |
-
- source_sentence: // MiddlewareHeaders adds headers to a handler
|
| 12 |
-
sentences:
|
| 13 |
-
- "function create(options) {\n console.log('Creating app: ', options.name);\n\n\
|
| 14 |
-
\ if (!options.status)\n options.status = (code, msg) => {\n console.log(msg);\n\
|
| 15 |
-
\ };\n if (!options.type) options.type = 'rekit-react';\n\n const prjDir\
|
| 16 |
-
\ = path.join(options.location || process.cwd(), options.name);\n return new\
|
| 17 |
-
\ Promise(async (resolve, reject) => {\n try {\n if (fs.existsSync(prjDir))\
|
| 18 |
-
\ {\n reject('FOLDER_EXISTS');\n return;\n }\n fs.mkdirSync(prjDir);\n\
|
| 19 |
-
\ let gitRepo;\n if (options.source) {\n if (/^https?:/.test(options.source))\
|
| 20 |
-
\ {\n // It's a git repo\n gitRepo = options.source;\n \
|
| 21 |
-
\ } else {\n // It's a local folder\n const srcDir = path.isAbsolute(options.source)\n\
|
| 22 |
-
\ ? options.source\n : path.join(process.cwd(), options.source);\n\
|
| 23 |
-
\ options.status('CREATE_APP_COPY_FILES', `Copy files from ${srcDir}...`);\n\
|
| 24 |
-
\ await fs.copy(srcDir, prjDir, {\n filter: src => !/\\/(\\\
|
| 25 |
-
.git|node_modules\\/|node_modules$)/.test(src) || path.basename(src) === '.gitignore',\n\
|
| 26 |
-
\ });\n }\n } else if (options.type) {\n // Get gitRepo\n\
|
| 27 |
-
\ options.status(\n 'QUERY_APP_TYPES_GIT_REPO',\n `Looking\
|
| 28 |
-
\ for the git repo for app type ${options.type}...`,\n );\n const\
|
| 29 |
-
\ appTypes = await getAppTypes();\n const appType = _.find(appTypes, {\
|
| 30 |
-
\ id: options.type });\n if (!appType) reject('APP_TYPE_NOT_SUPPORTED');\n\
|
| 31 |
-
\ gitRepo = appType.repo;\n } else {\n await fs.remove(prjDir);\n\
|
| 32 |
-
\ reject('NO_SOURCE_OR_APP_TYPE');\n }\n\n if (gitRepo) {\n \
|
| 33 |
-
\ options.status('CLONE_PROJECT', `Downloading project from ${gitRepo}...`);\n\
|
| 34 |
-
\ await cloneRepo(gitRepo, prjDir);\n }\n\n postCreate(prjDir,\
|
| 35 |
-
\ options);\n options.status('CREATION_SUCCESS', '\U0001F603App creation\
|
| 36 |
-
\ success.');\n resolve();\n } catch (err) {\n console.log('Failed\
|
| 37 |
-
\ to create project.');\n fs.removeSync(prjDir);\n reject(err);\n \
|
| 38 |
-
\ }\n });\n}"
|
| 39 |
-
- "@Override\n\tpublic void setFrameworkID(Option<Protos.FrameworkID> frameworkID)\
|
| 40 |
-
\ throws Exception {\n\t\tsynchronized (startStopLock) {\n\t\t\tverifyIsRunning();\n\
|
| 41 |
-
\n\t\t\tbyte[] value = frameworkID.isDefined() ? frameworkID.get().getValue().getBytes(ConfigConstants.DEFAULT_CHARSET)\
|
| 42 |
-
\ :\n\t\t\t\tnew byte[0];\n\t\t\tframeworkIdInZooKeeper.setValue(value);\n\t\t\
|
| 43 |
-
}\n\t}"
|
| 44 |
-
- "func MiddlewareHeaders(vs map[string]string) Middleware {\n\treturn func(h http.Handler)\
|
| 45 |
-
\ http.Handler {\n\t\treturn http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request)\
|
| 46 |
-
\ {\n\t\t\t// Add headers\n\t\t\thandleHeaders(vs, rw)\n\n\t\t\t// Next handler\n\
|
| 47 |
-
\t\t\th.ServeHTTP(rw, r)\n\t\t})\n\t}\n}"
|
| 48 |
-
- source_sentence: 'Parses a Plist XML string. Returns an Object.
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
@param {String} xml - the XML String to decode
|
| 52 |
-
|
| 53 |
-
@param {Function} callback - callback function
|
| 54 |
-
|
| 55 |
-
@returns {Mixed} the decoded value from the Plist XML
|
| 56 |
-
|
| 57 |
-
@api public
|
| 58 |
-
|
| 59 |
-
@deprecated use parse() instead'
|
| 60 |
-
sentences:
|
| 61 |
-
- "function parseStringSync (xml) {\n var doc = new DOMParser().parseFromString(xml);\n\
|
| 62 |
-
\ var plist;\n if (doc.documentElement.nodeName !== 'plist') {\n throw new\
|
| 63 |
-
\ Error('malformed document. First element should be <plist>');\n }\n plist\
|
| 64 |
-
\ = parsePlistXML(doc.documentElement);\n\n // if the plist is an array with\
|
| 65 |
-
\ 1 element, pull it out of the array\n if (plist.length == 1) {\n plist =\
|
| 66 |
-
\ plist[0];\n }\n return plist;\n}"
|
| 67 |
-
- "func GetCallStringArgsValues(n ast.Node, ctx *Context) []string {\n\tvalues :=\
|
| 68 |
-
\ []string{}\n\tswitch node := n.(type) {\n\tcase *ast.CallExpr:\n\t\tfor _, arg\
|
| 69 |
-
\ := range node.Args {\n\t\t\tswitch param := arg.(type) {\n\t\t\tcase *ast.BasicLit:\n\
|
| 70 |
-
\t\t\t\tvalue, err := GetString(param)\n\t\t\t\tif err == nil {\n\t\t\t\t\tvalues\
|
| 71 |
-
\ = append(values, value)\n\t\t\t\t}\n\t\t\tcase *ast.Ident:\n\t\t\t\tvalues =\
|
| 72 |
-
\ append(values, GetIdentStringValues(param)...)\n\t\t\t}\n\t\t}\n\t}\n\treturn\
|
| 73 |
-
\ values\n}"
|
| 74 |
-
- "public static Date beginOfYear(@NotNull final Date date) {\n\t\treturn DateUtils.truncate(date,\
|
| 75 |
-
\ Calendar.YEAR);\n\t}"
|
| 76 |
-
- source_sentence: '// forbiddenImportsFor determines all of the forbidden
|
| 77 |
-
|
| 78 |
-
// imports for a package given the import restrictions
|
| 79 |
-
|
| 80 |
-
// and returns a deduplicated list of them'
|
| 81 |
-
sentences:
|
| 82 |
-
- "func (i *ImportRestriction) forbiddenImportsFor(pkg Package) []string {\n\tforbiddenImportSet\
|
| 83 |
-
\ := map[string]struct{}{}\n\timports := pkg.Imports\n\tif !i.ExcludeTests {\n\
|
| 84 |
-
\t\timports = append(imports, append(pkg.TestImports, pkg.XTestImports...)...)\n\
|
| 85 |
-
\t}\n\tfor _, imp := range imports {\n\t\tpath := extractVendorPath(imp)\n\t\t\
|
| 86 |
-
if i.isForbidden(path) {\n\t\t\tforbiddenImportSet[path] = struct{}{}\n\t\t}\n\
|
| 87 |
-
\t}\n\n\tvar forbiddenImports []string\n\tfor imp := range forbiddenImportSet\
|
| 88 |
-
\ {\n\t\tforbiddenImports = append(forbiddenImports, imp)\n\t}\n\treturn forbiddenImports\n\
|
| 89 |
-
}"
|
| 90 |
-
- "function pick(o, props = []) {\n return props.reduce((acc, k) => {\n \
|
| 91 |
-
\ if (o.hasOwnProperty(k)) {\n acc[k] = o[k];\n }\n\n \
|
| 92 |
-
\ return acc;\n }, {});\n}"
|
| 93 |
-
- "func (s *PutTraceSegmentsOutput) SetUnprocessedTraceSegments(v []*UnprocessedTraceSegment)\
|
| 94 |
-
\ *PutTraceSegmentsOutput {\n\ts.UnprocessedTraceSegments = v\n\treturn s\n}"
|
| 95 |
-
- source_sentence: 'Validates whether the specified template is syntactically correct
|
| 96 |
-
and will be accepted by Azure Resource Manager..
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
@param resourceGroupName The name of the resource group the template will be deployed
|
| 100 |
-
to. The name is case insensitive.
|
| 101 |
-
|
| 102 |
-
@param deploymentName The name of the deployment.
|
| 103 |
-
|
| 104 |
-
@param properties The deployment properties.
|
| 105 |
-
|
| 106 |
-
@param serviceCallback the async ServiceCallback to handle successful and failed
|
| 107 |
-
responses.
|
| 108 |
-
|
| 109 |
-
@throws IllegalArgumentException thrown if parameters fail the validation
|
| 110 |
-
|
| 111 |
-
@return the {@link ServiceFuture} object'
|
| 112 |
-
sentences:
|
| 113 |
-
- "func Execute(v string) {\n\tversion = v\n\tif err := rootCmd.Execute(); err !=\
|
| 114 |
-
\ nil {\n\t\tlog.Fatal(err)\n\t}\n}"
|
| 115 |
-
- "function( otherPath )\r\n\t{\r\n\t\tvar thisElements = this.elements;\r\n\t\t\
|
| 116 |
-
var otherElements = otherPath && otherPath.elements;\r\n\r\n\t\tif ( !otherElements\
|
| 117 |
-
\ || thisElements.length != otherElements.length )\r\n\t\t\treturn false;\r\n\r\
|
| 118 |
-
\n\t\tfor ( var i = 0 ; i < thisElements.length ; i++ )\r\n\t\t{\r\n\t\t\tif (\
|
| 119 |
-
\ !thisElements[ i ].equals( otherElements[ i ] ) )\r\n\t\t\t\treturn false;\r\
|
| 120 |
-
\n\t\t}\r\n\r\n\t\treturn true;\r\n\t}"
|
| 121 |
-
- "public ServiceFuture<DeploymentValidateResultInner> validateAsync(String resourceGroupName,\
|
| 122 |
-
\ String deploymentName, DeploymentProperties properties, final ServiceCallback<DeploymentValidateResultInner>\
|
| 123 |
-
\ serviceCallback) {\n return ServiceFuture.fromResponse(validateWithServiceResponseAsync(resourceGroupName,\
|
| 124 |
-
\ deploymentName, properties), serviceCallback);\n }"
|
| 125 |
-
- source_sentence: This method calculates the turn weight separately.
|
| 126 |
-
sentences:
|
| 127 |
-
- "private SingleType parseSingleType() throws TTXPathException {\n\n final\
|
| 128 |
-
\ String atomicType = parseAtomicType();\n final boolean intero = is(TokenType.INTERROGATION,\
|
| 129 |
-
\ true);\n return new SingleType(atomicType, intero);\n }"
|
| 130 |
-
- "public void putAllWriteable(BeanMap<T> map) {\n map.types.keySet().stream().filter(key\
|
| 131 |
-
\ -> getWriteInvoker(key) != null).forEach(key -> this.put(key, map.get(key)));\n\
|
| 132 |
-
\ }"
|
| 133 |
-
- "public double calcTurnWeight(int edgeFrom, int nodeVia, int edgeTo) {\n \
|
| 134 |
-
\ long turnFlags = turnCostExt.getTurnCostFlags(edgeFrom, nodeVia, edgeTo);\n\
|
| 135 |
-
\ if (turnCostEncoder.isTurnRestricted(turnFlags))\n return\
|
| 136 |
-
\ Double.POSITIVE_INFINITY;\n\n return turnCostEncoder.getTurnCost(turnFlags);\n\
|
| 137 |
-
\ }"
|
| 138 |
pipeline_tag: sentence-similarity
|
| 139 |
library_name: sentence-transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
---
|
| 141 |
|
| 142 |
-
#
|
| 143 |
|
| 144 |
-
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
| 149 |
-
-
|
| 150 |
-
-
|
| 151 |
-
- **Maximum Sequence Length:** 1024 tokens
|
| 152 |
-
- **Output Dimensionality:** 512 dimensions
|
| 153 |
-
- **Similarity Function:** Cosine Similarity
|
| 154 |
-
<!-- - **Training Dataset:** Unknown -->
|
| 155 |
-
<!-- - **Language:** Unknown -->
|
| 156 |
-
<!-- - **License:** Unknown -->
|
| 157 |
|
| 158 |
-
|
| 159 |
|
| 160 |
-
|
| 161 |
-
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 162 |
-
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 163 |
|
| 164 |
-
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
|
|
|
| 174 |
|
| 175 |
-
|
| 176 |
|
| 177 |
-
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
-
Then you can load this model and run inference.
|
| 184 |
```python
|
| 185 |
from sentence_transformers import SentenceTransformer
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
'public double calcTurnWeight(int edgeFrom, int nodeVia, int edgeTo) {\n long turnFlags = turnCostExt.getTurnCostFlags(edgeFrom, nodeVia, edgeTo);\n if (turnCostEncoder.isTurnRestricted(turnFlags))\n return Double.POSITIVE_INFINITY;\n\n return turnCostEncoder.getTurnCost(turnFlags);\n }',
|
| 193 |
-
'public void putAllWriteable(BeanMap<T> map) {\n map.types.keySet().stream().filter(key -> getWriteInvoker(key) != null).forEach(key -> this.put(key, map.get(key)));\n }',
|
| 194 |
]
|
| 195 |
-
embeddings = model.encode(sentences)
|
| 196 |
-
print(embeddings.shape)
|
| 197 |
-
# [3, 512]
|
| 198 |
-
|
| 199 |
-
# Get the similarity scores for the embeddings
|
| 200 |
-
similarities = model.similarity(embeddings, embeddings)
|
| 201 |
-
print(similarities.shape)
|
| 202 |
-
# [3, 3]
|
| 203 |
-
```
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
</details>
|
| 211 |
-
-->
|
| 212 |
-
|
| 213 |
-
<!--
|
| 214 |
-
### Downstream Usage (Sentence Transformers)
|
| 215 |
-
|
| 216 |
-
You can finetune this model on your own dataset.
|
| 217 |
-
|
| 218 |
-
<details><summary>Click to expand</summary>
|
| 219 |
-
|
| 220 |
-
</details>
|
| 221 |
-
-->
|
| 222 |
-
|
| 223 |
-
<!--
|
| 224 |
-
### Out-of-Scope Use
|
| 225 |
-
|
| 226 |
-
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 227 |
-
-->
|
| 228 |
-
|
| 229 |
-
<!--
|
| 230 |
-
## Bias, Risks and Limitations
|
| 231 |
-
|
| 232 |
-
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 233 |
-
-->
|
| 234 |
-
|
| 235 |
-
<!--
|
| 236 |
-
### Recommendations
|
| 237 |
-
|
| 238 |
-
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 239 |
-
-->
|
| 240 |
-
|
| 241 |
-
## Training Details
|
| 242 |
-
|
| 243 |
-
### Training Dataset
|
| 244 |
-
|
| 245 |
-
#### Unnamed Dataset
|
| 246 |
-
|
| 247 |
-
* Size: 1,761,750 training samples
|
| 248 |
-
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
| 249 |
-
* Approximate statistics based on the first 1000 samples:
|
| 250 |
-
| | sentence_0 | sentence_1 | label |
|
| 251 |
-
|:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
| 252 |
-
| type | string | string | float |
|
| 253 |
-
| details | <ul><li>min: 3 tokens</li><li>mean: 47.87 tokens</li><li>max: 633 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 164.44 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
|
| 254 |
-
* Samples:
|
| 255 |
-
| sentence_0 | sentence_1 | label |
|
| 256 |
-
|:-----------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
| 257 |
-
| <code>// Read reads from serial port.<br>// It is blocked until data received or timeout after p.timeout.</code> | <code>func (p *port) Read(b []byte) (n int, err error) {<br> var done uint32<br> if err = syscall.ReadFile(p.handle, b, &done, nil); err != nil {<br> return<br> }<br> if done == 0 {<br> err = ErrTimeout<br> return<br> }<br> n = int(done)<br> return<br>}</code> | <code>1.0</code> |
|
| 258 |
-
| <code>// _NET_WM_STRUT_PARTIAL set</code> | <code>func WmStrutPartialSet(xu *xgbutil.XUtil, win xproto.Window,<br> struts *WmStrutPartial) error {<br><br> rawStruts := make([]uint, 12)<br> rawStruts[0] = struts.Left<br> rawStruts[1] = struts.Right<br> rawStruts[2] = struts.Top<br> rawStruts[3] = struts.Bottom<br> rawStruts[4] = struts.LeftStartY<br> rawStruts[5] = struts.LeftEndY<br> rawStruts[6] = struts.RightStartY<br> rawStruts[7] = struts.RightEndY<br> rawStruts[8] = struts.TopStartX<br> rawStruts[9] = struts.TopEndX<br> rawStruts[10] = struts.BottomStartX<br> rawStruts[11] = struts.BottomEndX<br><br> return xprop.ChangeProp32(xu, win, "_NET_WM_STRUT_PARTIAL", "CARDINAL",<br> rawStruts...)<br>}</code> | <code>1.0</code> |
|
| 259 |
-
| <code>// Union returns a new geometry representing all points in this geometry and the<br>// other.</code> | <code>func (g *Geometry) Union(other *Geometry) (*Geometry, error) {<br> return g.binaryTopo("Union", cGEOSUnion, other)<br>}</code> | <code>1.0</code> |
|
| 260 |
-
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 261 |
-
```json
|
| 262 |
-
{
|
| 263 |
-
"scale": 20.0,
|
| 264 |
-
"similarity_fct": "cos_sim"
|
| 265 |
-
}
|
| 266 |
-
```
|
| 267 |
-
|
| 268 |
-
### Training Hyperparameters
|
| 269 |
-
#### Non-Default Hyperparameters
|
| 270 |
-
|
| 271 |
-
- `per_device_train_batch_size`: 400
|
| 272 |
-
- `per_device_eval_batch_size`: 400
|
| 273 |
-
- `num_train_epochs`: 5
|
| 274 |
-
- `fp16`: True
|
| 275 |
-
- `multi_dataset_batch_sampler`: round_robin
|
| 276 |
-
|
| 277 |
-
#### All Hyperparameters
|
| 278 |
-
<details><summary>Click to expand</summary>
|
| 279 |
-
|
| 280 |
-
- `overwrite_output_dir`: False
|
| 281 |
-
- `do_predict`: False
|
| 282 |
-
- `eval_strategy`: no
|
| 283 |
-
- `prediction_loss_only`: True
|
| 284 |
-
- `per_device_train_batch_size`: 400
|
| 285 |
-
- `per_device_eval_batch_size`: 400
|
| 286 |
-
- `per_gpu_train_batch_size`: None
|
| 287 |
-
- `per_gpu_eval_batch_size`: None
|
| 288 |
-
- `gradient_accumulation_steps`: 1
|
| 289 |
-
- `eval_accumulation_steps`: None
|
| 290 |
-
- `torch_empty_cache_steps`: None
|
| 291 |
-
- `learning_rate`: 5e-05
|
| 292 |
-
- `weight_decay`: 0.0
|
| 293 |
-
- `adam_beta1`: 0.9
|
| 294 |
-
- `adam_beta2`: 0.999
|
| 295 |
-
- `adam_epsilon`: 1e-08
|
| 296 |
-
- `max_grad_norm`: 1
|
| 297 |
-
- `num_train_epochs`: 5
|
| 298 |
-
- `max_steps`: -1
|
| 299 |
-
- `lr_scheduler_type`: linear
|
| 300 |
-
- `lr_scheduler_kwargs`: {}
|
| 301 |
-
- `warmup_ratio`: 0.0
|
| 302 |
-
- `warmup_steps`: 0
|
| 303 |
-
- `log_level`: passive
|
| 304 |
-
- `log_level_replica`: warning
|
| 305 |
-
- `log_on_each_node`: True
|
| 306 |
-
- `logging_nan_inf_filter`: True
|
| 307 |
-
- `save_safetensors`: True
|
| 308 |
-
- `save_on_each_node`: False
|
| 309 |
-
- `save_only_model`: False
|
| 310 |
-
- `restore_callback_states_from_checkpoint`: False
|
| 311 |
-
- `no_cuda`: False
|
| 312 |
-
- `use_cpu`: False
|
| 313 |
-
- `use_mps_device`: False
|
| 314 |
-
- `seed`: 42
|
| 315 |
-
- `data_seed`: None
|
| 316 |
-
- `jit_mode_eval`: False
|
| 317 |
-
- `use_ipex`: False
|
| 318 |
-
- `bf16`: False
|
| 319 |
-
- `fp16`: True
|
| 320 |
-
- `fp16_opt_level`: O1
|
| 321 |
-
- `half_precision_backend`: auto
|
| 322 |
-
- `bf16_full_eval`: False
|
| 323 |
-
- `fp16_full_eval`: False
|
| 324 |
-
- `tf32`: None
|
| 325 |
-
- `local_rank`: 0
|
| 326 |
-
- `ddp_backend`: None
|
| 327 |
-
- `tpu_num_cores`: None
|
| 328 |
-
- `tpu_metrics_debug`: False
|
| 329 |
-
- `debug`: []
|
| 330 |
-
- `dataloader_drop_last`: False
|
| 331 |
-
- `dataloader_num_workers`: 0
|
| 332 |
-
- `dataloader_prefetch_factor`: None
|
| 333 |
-
- `past_index`: -1
|
| 334 |
-
- `disable_tqdm`: False
|
| 335 |
-
- `remove_unused_columns`: True
|
| 336 |
-
- `label_names`: None
|
| 337 |
-
- `load_best_model_at_end`: False
|
| 338 |
-
- `ignore_data_skip`: False
|
| 339 |
-
- `fsdp`: []
|
| 340 |
-
- `fsdp_min_num_params`: 0
|
| 341 |
-
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 342 |
-
- `tp_size`: 0
|
| 343 |
-
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 344 |
-
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 345 |
-
- `deepspeed`: None
|
| 346 |
-
- `label_smoothing_factor`: 0.0
|
| 347 |
-
- `optim`: adamw_torch
|
| 348 |
-
- `optim_args`: None
|
| 349 |
-
- `adafactor`: False
|
| 350 |
-
- `group_by_length`: False
|
| 351 |
-
- `length_column_name`: length
|
| 352 |
-
- `ddp_find_unused_parameters`: None
|
| 353 |
-
- `ddp_bucket_cap_mb`: None
|
| 354 |
-
- `ddp_broadcast_buffers`: False
|
| 355 |
-
- `dataloader_pin_memory`: True
|
| 356 |
-
- `dataloader_persistent_workers`: False
|
| 357 |
-
- `skip_memory_metrics`: True
|
| 358 |
-
- `use_legacy_prediction_loop`: False
|
| 359 |
-
- `push_to_hub`: False
|
| 360 |
-
- `resume_from_checkpoint`: None
|
| 361 |
-
- `hub_model_id`: None
|
| 362 |
-
- `hub_strategy`: every_save
|
| 363 |
-
- `hub_private_repo`: None
|
| 364 |
-
- `hub_always_push`: False
|
| 365 |
-
- `gradient_checkpointing`: False
|
| 366 |
-
- `gradient_checkpointing_kwargs`: None
|
| 367 |
-
- `include_inputs_for_metrics`: False
|
| 368 |
-
- `include_for_metrics`: []
|
| 369 |
-
- `eval_do_concat_batches`: True
|
| 370 |
-
- `fp16_backend`: auto
|
| 371 |
-
- `push_to_hub_model_id`: None
|
| 372 |
-
- `push_to_hub_organization`: None
|
| 373 |
-
- `mp_parameters`:
|
| 374 |
-
- `auto_find_batch_size`: False
|
| 375 |
-
- `full_determinism`: False
|
| 376 |
-
- `torchdynamo`: None
|
| 377 |
-
- `ray_scope`: last
|
| 378 |
-
- `ddp_timeout`: 1800
|
| 379 |
-
- `torch_compile`: False
|
| 380 |
-
- `torch_compile_backend`: None
|
| 381 |
-
- `torch_compile_mode`: None
|
| 382 |
-
- `include_tokens_per_second`: False
|
| 383 |
-
- `include_num_input_tokens_seen`: False
|
| 384 |
-
- `neftune_noise_alpha`: None
|
| 385 |
-
- `optim_target_modules`: None
|
| 386 |
-
- `batch_eval_metrics`: False
|
| 387 |
-
- `eval_on_start`: False
|
| 388 |
-
- `use_liger_kernel`: False
|
| 389 |
-
- `eval_use_gather_object`: False
|
| 390 |
-
- `average_tokens_across_devices`: False
|
| 391 |
-
- `prompts`: None
|
| 392 |
-
- `batch_sampler`: batch_sampler
|
| 393 |
-
- `multi_dataset_batch_sampler`: round_robin
|
| 394 |
-
|
| 395 |
-
</details>
|
| 396 |
-
|
| 397 |
-
### Training Logs
|
| 398 |
-
| Epoch | Step | Training Loss |
|
| 399 |
-
|:------:|:-----:|:-------------:|
|
| 400 |
-
| 0.1135 | 500 | 1.0064 |
|
| 401 |
-
| 0.2270 | 1000 | 0.1985 |
|
| 402 |
-
| 0.3405 | 1500 | 0.1802 |
|
| 403 |
-
| 0.4540 | 2000 | 0.1659 |
|
| 404 |
-
| 0.5675 | 2500 | 0.1583 |
|
| 405 |
-
| 0.6810 | 3000 | 0.153 |
|
| 406 |
-
| 0.7946 | 3500 | 0.1478 |
|
| 407 |
-
| 0.9081 | 4000 | 0.1425 |
|
| 408 |
-
| 1.0216 | 4500 | 0.132 |
|
| 409 |
-
| 1.1351 | 5000 | 0.097 |
|
| 410 |
-
| 1.2486 | 5500 | 0.1 |
|
| 411 |
-
| 1.3621 | 6000 | 0.0972 |
|
| 412 |
-
| 1.4756 | 6500 | 0.0958 |
|
| 413 |
-
| 1.5891 | 7000 | 0.0968 |
|
| 414 |
-
| 1.7026 | 7500 | 0.0945 |
|
| 415 |
-
| 1.8161 | 8000 | 0.0943 |
|
| 416 |
-
| 1.9296 | 8500 | 0.0938 |
|
| 417 |
-
| 2.0431 | 9000 | 0.0831 |
|
| 418 |
-
| 2.1566 | 9500 | 0.0634 |
|
| 419 |
-
| 2.2701 | 10000 | 0.0642 |
|
| 420 |
-
| 2.3837 | 10500 | 0.0639 |
|
| 421 |
-
| 2.4972 | 11000 | 0.0646 |
|
| 422 |
-
| 2.6107 | 11500 | 0.065 |
|
| 423 |
-
| 2.7242 | 12000 | 0.0637 |
|
| 424 |
-
| 2.8377 | 12500 | 0.062 |
|
| 425 |
-
| 2.9512 | 13000 | 0.0626 |
|
| 426 |
-
| 3.0647 | 13500 | 0.0522 |
|
| 427 |
-
| 3.1782 | 14000 | 0.0443 |
|
| 428 |
-
| 3.2917 | 14500 | 0.0435 |
|
| 429 |
-
| 3.4052 | 15000 | 0.0447 |
|
| 430 |
-
| 3.5187 | 15500 | 0.0441 |
|
| 431 |
-
| 3.6322 | 16000 | 0.045 |
|
| 432 |
-
| 3.7457 | 16500 | 0.0443 |
|
| 433 |
-
| 3.8593 | 17000 | 0.0441 |
|
| 434 |
-
| 3.9728 | 17500 | 0.0433 |
|
| 435 |
-
| 4.0863 | 18000 | 0.0368 |
|
| 436 |
-
| 4.1998 | 18500 | 0.0333 |
|
| 437 |
-
| 4.3133 | 19000 | 0.0332 |
|
| 438 |
-
| 4.4268 | 19500 | 0.0335 |
|
| 439 |
-
| 4.5403 | 20000 | 0.033 |
|
| 440 |
-
| 4.6538 | 20500 | 0.0334 |
|
| 441 |
-
| 4.7673 | 21000 | 0.0325 |
|
| 442 |
-
| 4.8808 | 21500 | 0.0342 |
|
| 443 |
-
| 4.9943 | 22000 | 0.0341 |
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
### Framework Versions
|
| 447 |
-
- Python: 3.11.12
|
| 448 |
-
- Sentence Transformers: 3.4.1
|
| 449 |
-
- Transformers: 4.51.3
|
| 450 |
-
- PyTorch: 2.6.0+cu124
|
| 451 |
-
- Accelerate: 1.5.2
|
| 452 |
-
- Datasets: 3.5.0
|
| 453 |
-
- Tokenizers: 0.21.1
|
| 454 |
-
|
| 455 |
-
## Citation
|
| 456 |
-
|
| 457 |
-
### BibTeX
|
| 458 |
-
|
| 459 |
-
#### Sentence Transformers
|
| 460 |
-
```bibtex
|
| 461 |
-
@inproceedings{reimers-2019-sentence-bert,
|
| 462 |
-
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 463 |
-
author = "Reimers, Nils and Gurevych, Iryna",
|
| 464 |
-
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 465 |
-
month = "11",
|
| 466 |
-
year = "2019",
|
| 467 |
-
publisher = "Association for Computational Linguistics",
|
| 468 |
-
url = "https://arxiv.org/abs/1908.10084",
|
| 469 |
-
}
|
| 470 |
-
```
|
| 471 |
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
@misc{henderson2017efficient,
|
| 475 |
-
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 476 |
-
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 477 |
-
year={2017},
|
| 478 |
-
eprint={1705.00652},
|
| 479 |
-
archivePrefix={arXiv},
|
| 480 |
-
primaryClass={cs.CL}
|
| 481 |
-
}
|
| 482 |
```
|
| 483 |
|
| 484 |
-
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
|
| 487 |
-
|
| 488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
|
| 490 |
-
|
| 491 |
-
## Model Card Authors
|
| 492 |
|
| 493 |
-
|
| 494 |
-
|
| 495 |
|
| 496 |
-
|
| 497 |
-
## Model Card Contact
|
| 498 |
|
| 499 |
-
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 500 |
-
-->
|
|
|
|
| 3 |
- sentence-transformers
|
| 4 |
- sentence-similarity
|
| 5 |
- feature-extraction
|
| 6 |
+
- code-search
|
| 7 |
+
- modernbert
|
| 8 |
+
- code
|
| 9 |
+
- python
|
| 10 |
+
- java
|
| 11 |
+
- javascript
|
| 12 |
+
- php
|
| 13 |
+
- ruby
|
| 14 |
+
- rust
|
| 15 |
+
- go
|
| 16 |
+
- mteb
|
| 17 |
base_model: Shuu12121/CodeModernBERT-Snake
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
pipeline_tag: sentence-similarity
|
| 19 |
library_name: sentence-transformers
|
| 20 |
+
license: apache-2.0
|
| 21 |
+
datasets:
|
| 22 |
+
- code-search-net/code_search_net
|
| 23 |
+
- Shuu12121/python-codesearch-filtered
|
| 24 |
+
- Shuu12121/java-codesearch-filtered
|
| 25 |
+
- Shuu12121/javascript-codesearch-filtered
|
| 26 |
+
- Shuu12121/rust-codesearch-filtered
|
| 27 |
+
- Shuu12121/ruby-codesearch-filtered
|
| 28 |
+
language:
|
| 29 |
+
- en
|
| 30 |
+
|
| 31 |
---
|
| 32 |
|
| 33 |
+
# Shuu12121/CodeSearch-ModernBERT-Snake-Plus 🐍
|
| 34 |
|
| 35 |
+
このモデルは、`Shuu12121/CodeModernBERT-Snake` をベースにした Sentence Transformer モデルであり、特に**多言語コード検索タスク**において高い性能を発揮するようファインチューニングされています。
|
| 36 |
|
| 37 |
+
> This is a Sentence Transformer model based on `Shuu12121/CodeModernBERT-Snake`, fine-tuned for high performance on multilingual code search tasks.
|
| 38 |
|
| 39 |
+
- **開発者 (Developer)**: [Shuu12121](https://huggingface.co/Shuu12121)
|
| 40 |
+
- **ベースモデル (Base Model)**: [Shuu12121/CodeModernBERT-Snake](https://huggingface.co/Shuu12121/CodeModernBERT-Snake)
|
| 41 |
+
- **ライセンス (License)**: Apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
---
|
| 44 |
|
| 45 |
+
## 📊 MTEB評価 / MTEB Evaluation
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
このモデルは、Massive Text Embedding Benchmark (MTEB) の**CodeSearchNet Retrieval**タスクにおいて、以下のスコアを記録しています。
|
| 48 |
|
| 49 |
+
| 指標 | スコア (標準版) | スコア (COIR版) |
|
| 50 |
+
|:-----|:----------------|:---------------|
|
| 51 |
+
| **main_score (nDCG@10)** | 0.87926 | 0.77199 |
|
| 52 |
+
| ndcg_at_1 | 0.78900 | 0.68372 |
|
| 53 |
+
| ndcg_at_3 | 0.86324 | 0.74734 |
|
| 54 |
+
| ndcg_at_5 | 0.87229 | 0.76061 |
|
| 55 |
+
| ndcg_at_10 | 0.87926 | 0.77199 |
|
| 56 |
+
| recall_at_10 | 0.95667 | 0.85808 |
|
| 57 |
+
| mrr_at_10 | 0.85375 | 0.74433 |
|
| 58 |
|
| 59 |
+
> ※ 標準版:通常のMTEB評価設定、COIR版:より厳しい類似度評価設定
|
| 60 |
+
> ※ 公式に提出していないためランキング情報は記載していません。(参考順位としては,標準版は10位前後のモデルとほぼ同等ぐらいでCOIR版は8位前後と同等ぐらいです,2025年4月現在)
|
| 61 |
|
| 62 |
+
---
|
| 63 |
|
| 64 |
+
# 🆚 Crow-Plusとの比較 / Comparison with Crow-Plus
|
| 65 |
|
| 66 |
+
Crow-Plus(`Shuu12121/CodeSearch-ModernBERT-Crow-Plus`)と比較すると、
|
| 67 |
+
**Snake-Plusは、パラメータ数を約半分に削減**しながら、非常に高い性能を維持しています。
|
| 68 |
+
|
| 69 |
+
| 指標 | Crow-Plus | Snake-Plus | コメント |
|
| 70 |
+
|:-----|:----------|:-----------|:---------|
|
| 71 |
+
| **main_score (nDCG@10, 標準版)** | 0.89296 | 0.87926 | SnakeはCrowに迫る水準を維持 |
|
| 72 |
+
| **main_score (nDCG@10, COIR版)** | 0.79884 | 0.77199 | 厳しい設定でもSnakeは健闘 |
|
| 73 |
+
| **パラメータ数 (推定)** | 約150M | 約77M | **SnakeはCrowの約半分** |
|
| 74 |
+
| Recall@10 (標準版) | 96.1% | 95.6% | ほぼ同等のリコール率 |
|
| 75 |
+
|
| 76 |
+
### 🔥 Snake-Plusの主な特徴
|
| 77 |
+
|
| 78 |
+
- **パラメータ数はCrow-Plusの約半分(50%)**
|
| 79 |
+
- **推論速度の高速化、メモリ消費量の低減**が可能
|
| 80 |
+
- **性能もほとんど維持**(nDCG@10ではCrow比約98%)
|
| 81 |
+
- 軽量なモデルが必要な環境(例:オンデバイス検索、低リソース推論)に特に適する
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
# 🧠 客観的分析まとめ
|
| 86 |
+
|
| 87 |
+
Snake-Plusは、**高性能を維持したまま小型化を実現**したモデルです。
|
| 88 |
+
特に「推論コストを削減しつつ、実用レベルの検索性能を確保したい」ユースケースに非常に適しています。
|
| 89 |
+
|
| 90 |
+
一方で、**絶対的な最高性能を目指す場合**(特に難易度の高いドメインデータ)では、Crow-Plusの方が優位となる場面もあり得ます。
|
| 91 |
+
**用途とリソースに応じた使い分け**が推奨されます。
|
| 92 |
+
|
| 93 |
+
---
|
| 94 |
+
|
| 95 |
+
## 🔧 モデル詳細 / Model Details
|
| 96 |
+
|
| 97 |
+
- **ベースモデル**: Shuu12121/CodeModernBERT-Snake
|
| 98 |
+
- **アーキテクチャ**: ModernBERT (hidden\_size: 512, layers: 12, heads: 8)
|
| 99 |
+
- **最大入力長**: 1024トークン
|
| 100 |
+
- **ファインチューニング**: CodeSearchNetなどの自然言語–コードペアを用いた類似性学習
|
| 101 |
+
- **Pooling**: CLS Poolingを使用(SentenceTransformer互換)
|
| 102 |
+
|
| 103 |
+
---
|
| 104 |
+
|
| 105 |
+
## 🚀 使用方法 / How to Use
|
| 106 |
|
|
|
|
| 107 |
```python
|
| 108 |
from sentence_transformers import SentenceTransformer
|
| 109 |
|
| 110 |
+
model = SentenceTransformer("Shuu12121/CodeSearch-ModernBERT-Snake-Plus")
|
| 111 |
+
|
| 112 |
+
code_snippets = [
|
| 113 |
+
"def factorial(n): if n == 0: return 1 else: return n * factorial(n-1)",
|
| 114 |
+
"function binarySearch(arr, target) { let left = 0, right = arr.length - 1; while (left <= right) { const mid = Math.floor((left + right) / 2); if (arr[mid] === target) return mid; if (arr[mid] < target) left = mid + 1; else right = mid - 1; } return -1; }"
|
|
|
|
|
|
|
| 115 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
queries = [
|
| 118 |
+
"calculate the factorial of a number recursively",
|
| 119 |
+
"find an element in a sorted array using binary search"
|
| 120 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
code_embeddings = model.encode(code_snippets)
|
| 123 |
+
query_embeddings = model.encode(queries)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
```
|
| 125 |
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## 🧩 想定用途と制限 / Intended Use & Limitations
|
| 129 |
+
|
| 130 |
+
**用途例 / Intended Use:**
|
| 131 |
+
- 多言語コード検索 (Natural Language to Code, Code to Code)
|
| 132 |
+
- コードの類似性判定
|
| 133 |
+
- コード分類・クラスタリング
|
| 134 |
+
- コード推薦システム構築
|
| 135 |
|
| 136 |
+
**対象言語 / Target Languages:**
|
| 137 |
+
- Python, Java, JavaScript, PHP, Ruby, Go, Rust
|
| 138 |
+
|
| 139 |
+
**制限 / Limitations:**
|
| 140 |
+
- 主に**関数レベルのコード**に最適化。非常に長いファイルや構文エラーを含むコードには弱い傾向あり。
|
| 141 |
+
- ドメイン特化タスクには追加ファインチューニングが有効な場合がある。
|
| 142 |
+
- **生成タスク**(例:コード補完・生成)には向いていない(エンコーダ専用)。
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
|
| 146 |
+
## 📩 連絡先 / Contact
|
|
|
|
| 147 |
|
| 148 |
+
ご質問・ご提案はこちらへどうぞ。
|
| 149 |
+
For questions or suggestions, please contact:
|
| 150 |
|
| 151 |
+
**📧 [email protected]**
|
|
|
|
| 152 |
|
|
|
|
|
|