#!/usr/bin/env bash set -euo pipefail # —–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # CONFIGURATION # —–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # GPUs and DDP settings export CUDA_VISIBLE_DEVICES="4,5,6,7" export NPROC_PER_NODE=4 # Paths DATASET_DIR="/data/qingq/PathVLM/baselines/github/SlideChat/dataset/stage_2_vqa_by_tumor/stage2_vqa_tumor_" SCRIPT_DIR="configs/slidechat/experiments" DEEPSPEED_CONFIG="configs/deepspeed/deepspeed_zero2.json" OUTPUT_BASE="/data/qingq/PathVLM/baselines/github/SlideChat/models/outputs" # Suffix for your work‐dirs WORKDIR_SUFFIX="original_2048maxlength_train_token_reducer_attn" # —–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # LOOP # —–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– for json_path in "${DATASET_DIR}"/*.json; do # extract e.g. "BLCA" then lowercase -> "blca" tumor="$(basename "${json_path}" .json)" tumor_lc="$(echo "${tumor}" | tr '[:upper:]' '[:lower:]')" cfg="${SCRIPT_DIR}/stage2_reducer_attn_${tumor_lc}.py" workdir="${OUTPUT_BASE}/${tumor_lc}_${WORKDIR_SUFFIX}" if [[ ! -f "${cfg}" ]]; then echo "⚠️ Config not found for ${tumor_lc}, skipping: ${cfg}" continue fi echo "🚀 Starting SFT on ${tumor_lc}" xtuner train \ "${cfg}" \ --deepspeed "${DEEPSPEED_CONFIG}" \ --work-dir "${workdir}" \ --local_rank 0 echo "✅ Finished ${tumor_lc}" echo done echo "🎉 All jobs submitted."