Upload folder using huggingface_hub
Browse files- README.md +10 -0
- modeling_internvl_chat.py +2 -1
README.md
CHANGED
|
@@ -89,6 +89,7 @@ model = AutoModel.from_pretrained(
|
|
| 89 |
path,
|
| 90 |
torch_dtype=torch.bfloat16,
|
| 91 |
low_cpu_mem_usage=True,
|
|
|
|
| 92 |
trust_remote_code=True).eval().cuda()
|
| 93 |
```
|
| 94 |
|
|
@@ -103,6 +104,7 @@ model = AutoModel.from_pretrained(
|
|
| 103 |
torch_dtype=torch.bfloat16,
|
| 104 |
load_in_8bit=True,
|
| 105 |
low_cpu_mem_usage=True,
|
|
|
|
| 106 |
trust_remote_code=True).eval()
|
| 107 |
```
|
| 108 |
|
|
@@ -149,6 +151,7 @@ model = AutoModel.from_pretrained(
|
|
| 149 |
path,
|
| 150 |
torch_dtype=torch.bfloat16,
|
| 151 |
low_cpu_mem_usage=True,
|
|
|
|
| 152 |
trust_remote_code=True,
|
| 153 |
device_map=device_map).eval()
|
| 154 |
```
|
|
@@ -166,6 +169,7 @@ model = AutoModel.from_pretrained(
|
|
| 166 |
path,
|
| 167 |
torch_dtype=torch.bfloat16,
|
| 168 |
low_cpu_mem_usage=True,
|
|
|
|
| 169 |
trust_remote_code=True).eval().cuda()
|
| 170 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 171 |
|
|
@@ -193,6 +197,7 @@ model = AutoModel.from_pretrained(
|
|
| 193 |
path,
|
| 194 |
torch_dtype=torch.bfloat16,
|
| 195 |
low_cpu_mem_usage=True,
|
|
|
|
| 196 |
trust_remote_code=True).eval().cuda()
|
| 197 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 198 |
|
|
@@ -219,6 +224,7 @@ model = AutoModel.from_pretrained(
|
|
| 219 |
path,
|
| 220 |
torch_dtype=torch.bfloat16,
|
| 221 |
low_cpu_mem_usage=True,
|
|
|
|
| 222 |
trust_remote_code=True).eval().cuda()
|
| 223 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 224 |
|
|
@@ -252,6 +258,7 @@ model = AutoModel.from_pretrained(
|
|
| 252 |
path,
|
| 253 |
torch_dtype=torch.bfloat16,
|
| 254 |
low_cpu_mem_usage=True,
|
|
|
|
| 255 |
trust_remote_code=True).eval().cuda()
|
| 256 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 257 |
|
|
@@ -290,6 +297,7 @@ model = AutoModel.from_pretrained(
|
|
| 290 |
path,
|
| 291 |
torch_dtype=torch.bfloat16,
|
| 292 |
low_cpu_mem_usage=True,
|
|
|
|
| 293 |
trust_remote_code=True).eval().cuda()
|
| 294 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 295 |
|
|
@@ -327,6 +335,7 @@ model = AutoModel.from_pretrained(
|
|
| 327 |
path,
|
| 328 |
torch_dtype=torch.bfloat16,
|
| 329 |
low_cpu_mem_usage=True,
|
|
|
|
| 330 |
trust_remote_code=True).eval().cuda()
|
| 331 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 332 |
|
|
@@ -397,6 +406,7 @@ model = AutoModel.from_pretrained(
|
|
| 397 |
path,
|
| 398 |
torch_dtype=torch.bfloat16,
|
| 399 |
low_cpu_mem_usage=True,
|
|
|
|
| 400 |
trust_remote_code=True).eval().cuda()
|
| 401 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 402 |
|
|
|
|
| 89 |
path,
|
| 90 |
torch_dtype=torch.bfloat16,
|
| 91 |
low_cpu_mem_usage=True,
|
| 92 |
+
use_flash_attn=True,
|
| 93 |
trust_remote_code=True).eval().cuda()
|
| 94 |
```
|
| 95 |
|
|
|
|
| 104 |
torch_dtype=torch.bfloat16,
|
| 105 |
load_in_8bit=True,
|
| 106 |
low_cpu_mem_usage=True,
|
| 107 |
+
use_flash_attn=True,
|
| 108 |
trust_remote_code=True).eval()
|
| 109 |
```
|
| 110 |
|
|
|
|
| 151 |
path,
|
| 152 |
torch_dtype=torch.bfloat16,
|
| 153 |
low_cpu_mem_usage=True,
|
| 154 |
+
use_flash_attn=True,
|
| 155 |
trust_remote_code=True,
|
| 156 |
device_map=device_map).eval()
|
| 157 |
```
|
|
|
|
| 169 |
path,
|
| 170 |
torch_dtype=torch.bfloat16,
|
| 171 |
low_cpu_mem_usage=True,
|
| 172 |
+
use_flash_attn=True,
|
| 173 |
trust_remote_code=True).eval().cuda()
|
| 174 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 175 |
|
|
|
|
| 197 |
path,
|
| 198 |
torch_dtype=torch.bfloat16,
|
| 199 |
low_cpu_mem_usage=True,
|
| 200 |
+
use_flash_attn=True,
|
| 201 |
trust_remote_code=True).eval().cuda()
|
| 202 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 203 |
|
|
|
|
| 224 |
path,
|
| 225 |
torch_dtype=torch.bfloat16,
|
| 226 |
low_cpu_mem_usage=True,
|
| 227 |
+
use_flash_attn=True,
|
| 228 |
trust_remote_code=True).eval().cuda()
|
| 229 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 230 |
|
|
|
|
| 258 |
path,
|
| 259 |
torch_dtype=torch.bfloat16,
|
| 260 |
low_cpu_mem_usage=True,
|
| 261 |
+
use_flash_attn=True,
|
| 262 |
trust_remote_code=True).eval().cuda()
|
| 263 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 264 |
|
|
|
|
| 297 |
path,
|
| 298 |
torch_dtype=torch.bfloat16,
|
| 299 |
low_cpu_mem_usage=True,
|
| 300 |
+
use_flash_attn=True,
|
| 301 |
trust_remote_code=True).eval().cuda()
|
| 302 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 303 |
|
|
|
|
| 335 |
path,
|
| 336 |
torch_dtype=torch.bfloat16,
|
| 337 |
low_cpu_mem_usage=True,
|
| 338 |
+
use_flash_attn=True,
|
| 339 |
trust_remote_code=True).eval().cuda()
|
| 340 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 341 |
|
|
|
|
| 406 |
path,
|
| 407 |
torch_dtype=torch.bfloat16,
|
| 408 |
low_cpu_mem_usage=True,
|
| 409 |
+
use_flash_attn=True,
|
| 410 |
trust_remote_code=True).eval().cuda()
|
| 411 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
| 412 |
|
modeling_internvl_chat.py
CHANGED
|
@@ -17,7 +17,7 @@ from transformers.utils import ModelOutput, logging
|
|
| 17 |
|
| 18 |
from .configuration_internvl_chat import InternVLChatConfig
|
| 19 |
from .conversation import get_conv_template
|
| 20 |
-
from .modeling_intern_vit import InternVisionModel
|
| 21 |
|
| 22 |
logger = logging.get_logger(__name__)
|
| 23 |
|
|
@@ -48,6 +48,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
| 48 |
self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
|
| 49 |
self.downsample_ratio = config.downsample_ratio
|
| 50 |
self.ps_version = config.ps_version
|
|
|
|
| 51 |
config.vision_config.use_flash_attn = True if use_flash_attn else False
|
| 52 |
config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
|
| 53 |
|
|
|
|
| 17 |
|
| 18 |
from .configuration_internvl_chat import InternVLChatConfig
|
| 19 |
from .conversation import get_conv_template
|
| 20 |
+
from .modeling_intern_vit import InternVisionModel, has_flash_attn
|
| 21 |
|
| 22 |
logger = logging.get_logger(__name__)
|
| 23 |
|
|
|
|
| 48 |
self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
|
| 49 |
self.downsample_ratio = config.downsample_ratio
|
| 50 |
self.ps_version = config.ps_version
|
| 51 |
+
use_flash_attn = use_flash_attn if has_flash_attn else False
|
| 52 |
config.vision_config.use_flash_attn = True if use_flash_attn else False
|
| 53 |
config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
|
| 54 |
|