Commit
·
142339c
1
Parent(s):
723daaf
change flash_attention_2 to eager
Browse files- .idea/.gitignore +8 -0
- .idea/AVL.iml +12 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- handler.py +1 -0
- videollama2/model/__init__.py +1 -1
- videollama2/model/encoder.py +2 -2
- videollama2/train.py +1 -1
.idea/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 默认忽略的文件
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# 基于编辑器的 HTTP 客户端请求
|
| 5 |
+
/httpRequests/
|
| 6 |
+
# Datasource local storage ignored files
|
| 7 |
+
/dataSources/
|
| 8 |
+
/dataSources.local.xml
|
.idea/AVL.iml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="inheritedJdk" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
<component name="PyDocumentationSettings">
|
| 9 |
+
<option name="format" value="GOOGLE" />
|
| 10 |
+
<option name="myDocStringFormat" value="Google" />
|
| 11 |
+
</component>
|
| 12 |
+
</module>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/AVL.iml" filepath="$PROJECT_DIR$/.idea/AVL.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
handler.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Dict, Any
|
|
| 2 |
import base64
|
| 3 |
import tempfile
|
| 4 |
import os
|
|
|
|
| 5 |
import sys
|
| 6 |
|
| 7 |
|
|
|
|
| 2 |
import base64
|
| 3 |
import tempfile
|
| 4 |
import os
|
| 5 |
+
os.environ["TRANSFORMERS_NO_FLASH_ATTN_2"] = "1"
|
| 6 |
import sys
|
| 7 |
|
| 8 |
|
videollama2/model/__init__.py
CHANGED
|
@@ -71,7 +71,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
|
|
| 71 |
kwargs['torch_dtype'] = torch.float16
|
| 72 |
|
| 73 |
if use_flash_attn:
|
| 74 |
-
kwargs['attn_implementation'] = '
|
| 75 |
|
| 76 |
config = AutoConfig.from_pretrained(model_path)
|
| 77 |
|
|
|
|
| 71 |
kwargs['torch_dtype'] = torch.float16
|
| 72 |
|
| 73 |
if use_flash_attn:
|
| 74 |
+
kwargs['attn_implementation'] = 'eager'
|
| 75 |
|
| 76 |
config = AutoConfig.from_pretrained(model_path)
|
| 77 |
|
videollama2/model/encoder.py
CHANGED
|
@@ -21,7 +21,7 @@ class CLIPVisionTower(nn.Module):
|
|
| 21 |
self.image_processor = CLIPImageProcessor.from_pretrained(self.vision_tower_name)
|
| 22 |
|
| 23 |
config = CLIPVisionConfig.from_pretrained(self.vision_tower_name)
|
| 24 |
-
config._attn_implementation = "
|
| 25 |
|
| 26 |
if not load_pretrained:
|
| 27 |
self.vision_tower = CLIPVisionModel(config=config)
|
|
@@ -93,7 +93,7 @@ class SiglipVisionTower(nn.Module):
|
|
| 93 |
self.image_processor = SiglipImageProcessor.from_pretrained(self.vision_tower_name)
|
| 94 |
|
| 95 |
config = SiglipVisionConfig.from_pretrained(self.vision_tower_name)
|
| 96 |
-
config._attn_implementation = '
|
| 97 |
|
| 98 |
if not load_pretrained:
|
| 99 |
self.vision_tower = SiglipVisionModel(config=config)
|
|
|
|
| 21 |
self.image_processor = CLIPImageProcessor.from_pretrained(self.vision_tower_name)
|
| 22 |
|
| 23 |
config = CLIPVisionConfig.from_pretrained(self.vision_tower_name)
|
| 24 |
+
config._attn_implementation = "eager"
|
| 25 |
|
| 26 |
if not load_pretrained:
|
| 27 |
self.vision_tower = CLIPVisionModel(config=config)
|
|
|
|
| 93 |
self.image_processor = SiglipImageProcessor.from_pretrained(self.vision_tower_name)
|
| 94 |
|
| 95 |
config = SiglipVisionConfig.from_pretrained(self.vision_tower_name)
|
| 96 |
+
config._attn_implementation = 'eager'
|
| 97 |
|
| 98 |
if not load_pretrained:
|
| 99 |
self.vision_tower = SiglipVisionModel(config=config)
|
videollama2/train.py
CHANGED
|
@@ -571,4 +571,4 @@ def train(attn_implementation=None):
|
|
| 571 |
|
| 572 |
|
| 573 |
if __name__ == "__main__":
|
| 574 |
-
train("
|
|
|
|
| 571 |
|
| 572 |
|
| 573 |
if __name__ == "__main__":
|
| 574 |
+
train("eager")
|