Flash attn hotfix (#951)
Browse files* use previous arg
* use eager to use legacy attention that can be patched
src/axolotl/utils/models.py
CHANGED
|
@@ -324,6 +324,10 @@ def load_model(
|
|
| 324 |
model_config._attn_implementation = ( # pylint: disable=protected-access
|
| 325 |
"flash_attention_2"
|
| 326 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
try:
|
| 329 |
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
|
|
|
|
| 324 |
model_config._attn_implementation = ( # pylint: disable=protected-access
|
| 325 |
"flash_attention_2"
|
| 326 |
)
|
| 327 |
+
else:
|
| 328 |
+
model_config._attn_implementation = ( # pylint: disable=protected-access
|
| 329 |
+
"eager"
|
| 330 |
+
)
|
| 331 |
|
| 332 |
try:
|
| 333 |
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
|