Refactor model.py and requirements.txt for better code organization and remove flash-attn dependency
Browse files- kitt/core/model.py +0 -1
- requirements.txt +0 -1
kitt/core/model.py
CHANGED
|
@@ -347,7 +347,6 @@ def run_inference_ollama(prompt):
|
|
| 347 |
|
| 348 |
def load_gpu_model():
|
| 349 |
import bitsandbytes
|
| 350 |
-
import flash_attn
|
| 351 |
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
|
| 352 |
|
| 353 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
| 347 |
|
| 348 |
def load_gpu_model():
|
| 349 |
import bitsandbytes
|
|
|
|
| 350 |
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
|
| 351 |
|
| 352 |
tokenizer = AutoTokenizer.from_pretrained(
|
requirements.txt
CHANGED
|
@@ -6,7 +6,6 @@ wurlitzer
|
|
| 6 |
accelerate
|
| 7 |
bitsandbytes
|
| 8 |
optimum
|
| 9 |
-
flash-attn
|
| 10 |
# auto-gptq
|
| 11 |
gradio
|
| 12 |
TTS
|
|
|
|
| 6 |
accelerate
|
| 7 |
bitsandbytes
|
| 8 |
optimum
|
|
|
|
| 9 |
# auto-gptq
|
| 10 |
gradio
|
| 11 |
TTS
|