pure_model_weights / code /xtuner /scripts /acmil_training_script.sh
WinstonHu's picture
Upload folder xtuner to code/xtuner
e5e24c9 verified
raw
history blame
1.96 kB
#!/usr/bin/env bash
set -euo pipefail
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# CONFIGURATION
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# GPUs and DDP settings
export CUDA_VISIBLE_DEVICES="6,7"
export NPROC_PER_NODE=2
# Paths
DATASET_DIR="/data/qingq/PathVLM/baselines/github/SlideChat/dataset/stage_2_vqa_by_tumor/stage2_vqa_tumor_"
SCRIPT_DIR="configs/slidechat/experiments_acmil"
DEEPSPEED_CONFIG="configs/deepspeed/deepspeed_zero2.json"
OUTPUT_BASE="/data/qingq/PathVLM/baselines/github/SlideChat/models/outputs"
# Suffix for your work‐dirs
WORKDIR_SUFFIX="original_2048maxlength_acmil"
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# LOOP
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
for json_path in "${DATASET_DIR}"/*.json; do
# extract e.g. "BLCA" then lowercase -> "blca"
tumor="$(basename "${json_path}" .json)"
tumor_lc="$(echo "${tumor}" | tr '[:upper:]' '[:lower:]')"
cfg="${SCRIPT_DIR}/stage_2_acmil_${tumor_lc}.py"
workdir="${OUTPUT_BASE}/${tumor_lc}_${WORKDIR_SUFFIX}"
if [[ ! -f "${cfg}" ]]; then
echo "⚠️ Config not found for ${tumor_lc}, skipping: ${cfg}"
continue
fi
echo "πŸš€ Starting SFT on ${tumor_lc}"
xtuner train \
"${cfg}" \
--deepspeed "${DEEPSPEED_CONFIG}" \
--work-dir "${workdir}" \
--local_rank 0
echo "βœ… Finished ${tumor_lc}"
echo
done
echo "πŸŽ‰ All jobs submitted."