pure_model_weights / code /xtuner /scripts /dynamic_training_script.sh
WinstonHu's picture
Upload folder xtuner to code/xtuner
e5e24c9 verified
raw
history blame
1.09 kB
#!/bin/bash
#SBATCH --job-name=dynamic-llava
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --gres=gpu:8
#SBATCH --cpus-per-task=8
#SBATCH --time=72:00:00
#SBATCH --output=logs/dynamic_llava_%j.out
#SBATCH --error=logs/dynamic_llava_%j.err
# Environment setup
export CUDA_VISIBLE_DEVICES=3,4,5,1,0,6,7,2
export NPROC_PER_NODE=8
# Path settings
CONFIG_DIR="configs/slidechat/experiments_dynamic_llava"
OUTPUT_BASE="/data/qingq/PathVLM/baselines/github/SlideChat/models/outputs"
DS_CONFIG="configs/deepspeed/deepspeed_zero3_offload.json"
# Tumor types
TUMORS=("BRCA" "LGG" "COAD" "GBM")
# Create log dir if missing
mkdir -p logs
# Loop over tumor types
for TUMOR in "${TUMORS[@]}"; do
CONFIG_FILE="${CONFIG_DIR}/stage_2_dynamic_llava_${TUMOR,,}.py"
WORK_DIR="${OUTPUT_BASE}/stage2_dynamic_llava_qlora_${TUMOR,,}"
echo "Launching training for tumor: $TUMOR"
echo " Config: $CONFIG_FILE"
echo " Work dir: $WORK_DIR"
xtuner train \
"$CONFIG_FILE" \
--deepspeed "$DS_CONFIG" \
--work-dir "$WORK_DIR" \
--local_rank 0
done