change flash_attention_2 to eager

Files changed (9) hide show

.idea/.gitignore ADDED Viewed

+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/AVL.iml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="GOOGLE" />
+    <option name="myDocStringFormat" value="Google" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/modules.xml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/AVL.iml" filepath="$PROJECT_DIR$/.idea/AVL.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

handler.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Dict, Any
 import base64
 import tempfile
 import os
 import sys

 import base64
 import tempfile
 import os
+os.environ["TRANSFORMERS_NO_FLASH_ATTN_2"] = "1"
 import sys

videollama2/model/__init__.py CHANGED Viewed

@@ -71,7 +71,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
         kwargs['torch_dtype'] = torch.float16
     if use_flash_attn:
-        kwargs['attn_implementation'] = 'flash_attention_2'
     config = AutoConfig.from_pretrained(model_path)

         kwargs['torch_dtype'] = torch.float16
     if use_flash_attn:
+        kwargs['attn_implementation'] = 'eager'
     config = AutoConfig.from_pretrained(model_path)

videollama2/model/encoder.py CHANGED Viewed

@@ -21,7 +21,7 @@ class CLIPVisionTower(nn.Module):
         self.image_processor = CLIPImageProcessor.from_pretrained(self.vision_tower_name)
         config = CLIPVisionConfig.from_pretrained(self.vision_tower_name)
-        config._attn_implementation = "flash_attention_2"
         if not load_pretrained:
             self.vision_tower = CLIPVisionModel(config=config)
@@ -93,7 +93,7 @@ class SiglipVisionTower(nn.Module):
         self.image_processor = SiglipImageProcessor.from_pretrained(self.vision_tower_name)
         config = SiglipVisionConfig.from_pretrained(self.vision_tower_name)
-        config._attn_implementation = 'flash_attention_2'
         if not load_pretrained:
             self.vision_tower = SiglipVisionModel(config=config)

         self.image_processor = CLIPImageProcessor.from_pretrained(self.vision_tower_name)
         config = CLIPVisionConfig.from_pretrained(self.vision_tower_name)
+        config._attn_implementation = "eager"
         if not load_pretrained:
             self.vision_tower = CLIPVisionModel(config=config)
         self.image_processor = SiglipImageProcessor.from_pretrained(self.vision_tower_name)
         config = SiglipVisionConfig.from_pretrained(self.vision_tower_name)
+        config._attn_implementation = 'eager'
         if not load_pretrained:
             self.vision_tower = SiglipVisionModel(config=config)

videollama2/train.py CHANGED Viewed

@@ -571,4 +571,4 @@ def train(attn_implementation=None):
 if __name__ == "__main__":
-    train("flash_attention_2")