#!/bin/bash #SBATCH --job-name=dynamic-llava #SBATCH --nodes=1 #SBATCH --ntasks-per-node=8 #SBATCH --gres=gpu:8 #SBATCH --cpus-per-task=8 #SBATCH --time=72:00:00 #SBATCH --output=logs/dynamic_llava_%j.out #SBATCH --error=logs/dynamic_llava_%j.err # Environment setup export CUDA_VISIBLE_DEVICES=3,4,5,1,0,6,7,2 export NPROC_PER_NODE=8 # Path settings CONFIG_DIR="configs/slidechat/experiments_dynamic_llava" OUTPUT_BASE="/data/qingq/PathVLM/baselines/github/SlideChat/models/outputs" DS_CONFIG="configs/deepspeed/deepspeed_zero3_offload.json" # Tumor types TUMORS=("BRCA" "LGG" "COAD" "GBM") # Create log dir if missing mkdir -p logs # Loop over tumor types for TUMOR in "${TUMORS[@]}"; do CONFIG_FILE="${CONFIG_DIR}/stage_2_dynamic_llava_${TUMOR,,}.py" WORK_DIR="${OUTPUT_BASE}/stage2_dynamic_llava_qlora_${TUMOR,,}" echo "Launching training for tumor: $TUMOR" echo " Config: $CONFIG_FILE" echo " Work dir: $WORK_DIR" xtuner train \ "$CONFIG_FILE" \ --deepspeed "$DS_CONFIG" \ --work-dir "$WORK_DIR" \ --local_rank 0 done