Upload LlavaOnevisionForConditionalGeneration
Browse files- README.md +3 -3
- config.json +3 -7
- model.safetensors +1 -1
README.md
CHANGED
|
@@ -2,15 +2,15 @@
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
- zh
|
| 5 |
-
pipeline_tag: image-text-to-text
|
| 6 |
-
inference: false
|
| 7 |
-
arxiv: 2408.03326
|
| 8 |
license: apache-2.0
|
| 9 |
tags:
|
| 10 |
- vision
|
| 11 |
- image-text-to-text
|
| 12 |
datasets:
|
| 13 |
- lmms-lab/LLaVA-OneVision-Data
|
|
|
|
|
|
|
|
|
|
| 14 |
---
|
| 15 |
# LLaVA-Onevision Model Card
|
| 16 |
|
|
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
- zh
|
|
|
|
|
|
|
|
|
|
| 5 |
license: apache-2.0
|
| 6 |
tags:
|
| 7 |
- vision
|
| 8 |
- image-text-to-text
|
| 9 |
datasets:
|
| 10 |
- lmms-lab/LLaVA-OneVision-Data
|
| 11 |
+
pipeline_tag: image-text-to-text
|
| 12 |
+
inference: false
|
| 13 |
+
arxiv: 2408.03326
|
| 14 |
---
|
| 15 |
# LLaVA-Onevision Model Card
|
| 16 |
|
config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"_name_or_path": "/raid/raushan/ov-500",
|
| 3 |
"architectures": [
|
| 4 |
-
"
|
| 5 |
],
|
| 6 |
"ignore_index": -100,
|
| 7 |
"image_grid_pinpoints": [
|
|
@@ -151,7 +151,7 @@
|
|
| 151 |
]
|
| 152 |
],
|
| 153 |
"image_token_index": 151646,
|
| 154 |
-
"model_type": "
|
| 155 |
"projector_hidden_act": "gelu",
|
| 156 |
"text_config": {
|
| 157 |
"_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
|
|
@@ -162,30 +162,26 @@
|
|
| 162 |
"eos_token_id": 151645,
|
| 163 |
"hidden_size": 896,
|
| 164 |
"intermediate_size": 4864,
|
| 165 |
-
"max_position_embeddings": 32768,
|
| 166 |
"max_window_layers": 24,
|
| 167 |
"model_type": "qwen2",
|
| 168 |
"num_attention_heads": 14,
|
| 169 |
"num_hidden_layers": 24,
|
| 170 |
"num_key_value_heads": 2,
|
| 171 |
"rope_theta": 1000000.0,
|
| 172 |
-
"sliding_window": null,
|
| 173 |
"tie_word_embeddings": true,
|
| 174 |
"torch_dtype": "bfloat16",
|
| 175 |
-
"use_sliding_window": false,
|
| 176 |
"vocab_size": 152000
|
| 177 |
},
|
| 178 |
"tie_word_embeddings": false,
|
| 179 |
"torch_dtype": "float16",
|
| 180 |
"transformers_version": "4.45.0.dev0",
|
| 181 |
"use_image_newline_parameter": true,
|
|
|
|
| 182 |
"vision_aspect_ratio": "anyres_max_9",
|
| 183 |
"vision_config": {
|
| 184 |
-
"hidden_act": "gelu_pytorch_tanh",
|
| 185 |
"hidden_size": 1152,
|
| 186 |
"image_size": 384,
|
| 187 |
"intermediate_size": 4304,
|
| 188 |
-
"layer_norm_eps": 1e-06,
|
| 189 |
"model_type": "siglip_vision_model",
|
| 190 |
"num_attention_heads": 16,
|
| 191 |
"num_hidden_layers": 26,
|
|
|
|
| 1 |
{
|
| 2 |
"_name_or_path": "/raid/raushan/ov-500",
|
| 3 |
"architectures": [
|
| 4 |
+
"LlavaOnevisionForConditionalGeneration"
|
| 5 |
],
|
| 6 |
"ignore_index": -100,
|
| 7 |
"image_grid_pinpoints": [
|
|
|
|
| 151 |
]
|
| 152 |
],
|
| 153 |
"image_token_index": 151646,
|
| 154 |
+
"model_type": "llava_onevision",
|
| 155 |
"projector_hidden_act": "gelu",
|
| 156 |
"text_config": {
|
| 157 |
"_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
|
|
|
|
| 162 |
"eos_token_id": 151645,
|
| 163 |
"hidden_size": 896,
|
| 164 |
"intermediate_size": 4864,
|
|
|
|
| 165 |
"max_window_layers": 24,
|
| 166 |
"model_type": "qwen2",
|
| 167 |
"num_attention_heads": 14,
|
| 168 |
"num_hidden_layers": 24,
|
| 169 |
"num_key_value_heads": 2,
|
| 170 |
"rope_theta": 1000000.0,
|
|
|
|
| 171 |
"tie_word_embeddings": true,
|
| 172 |
"torch_dtype": "bfloat16",
|
|
|
|
| 173 |
"vocab_size": 152000
|
| 174 |
},
|
| 175 |
"tie_word_embeddings": false,
|
| 176 |
"torch_dtype": "float16",
|
| 177 |
"transformers_version": "4.45.0.dev0",
|
| 178 |
"use_image_newline_parameter": true,
|
| 179 |
+
"video_token_index": 151647,
|
| 180 |
"vision_aspect_ratio": "anyres_max_9",
|
| 181 |
"vision_config": {
|
|
|
|
| 182 |
"hidden_size": 1152,
|
| 183 |
"image_size": 384,
|
| 184 |
"intermediate_size": 4304,
|
|
|
|
| 185 |
"model_type": "siglip_vision_model",
|
| 186 |
"num_attention_heads": 16,
|
| 187 |
"num_hidden_layers": 26,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1787445680
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07b3362c3412de79baf2379e44e5b0b2a8f4b965ebebd11d7b5b3eb4450fe96e
|
| 3 |
size 1787445680
|