| #SBATCH --job-name=dynamic-llava | |
| #SBATCH --nodes=1 | |
| #SBATCH --ntasks-per-node=8 | |
| #SBATCH --gres=gpu:8 | |
| #SBATCH --cpus-per-task=8 | |
| #SBATCH --time=72:00:00 | |
| #SBATCH --output=logs/dynamic_llava_%j.out | |
| #SBATCH --error=logs/dynamic_llava_%j.err | |
| # Environment setup | |
| export CUDA_VISIBLE_DEVICES=3,4,5,1,0,6,7,2 | |
| export NPROC_PER_NODE=8 | |
| # Path settings | |
| CONFIG_DIR="configs/slidechat/experiments_dynamic_llava" | |
| OUTPUT_BASE="/data/qingq/PathVLM/baselines/github/SlideChat/models/outputs" | |
| DS_CONFIG="configs/deepspeed/deepspeed_zero3_offload.json" | |
| # Tumor types | |
| TUMORS=("BRCA" "LGG" "COAD" "GBM") | |
| # Create log dir if missing | |
| mkdir -p logs | |
| # Loop over tumor types | |
| for TUMOR in "${TUMORS[@]}"; do | |
| CONFIG_FILE="${CONFIG_DIR}/stage_2_dynamic_llava_${TUMOR,,}.py" | |
| WORK_DIR="${OUTPUT_BASE}/stage2_dynamic_llava_qlora_${TUMOR,,}" | |
| echo "Launching training for tumor: $TUMOR" | |
| echo " Config: $CONFIG_FILE" | |
| echo " Work dir: $WORK_DIR" | |
| xtuner train \ | |
| "$CONFIG_FILE" \ | |
| --deepspeed "$DS_CONFIG" \ | |
| --work-dir "$WORK_DIR" \ | |
| --local_rank 0 | |
| done |