pure_model_weights / code /xtuner /scripts /attn_training_script.sh
WinstonHu's picture
Upload folder xtuner to code/xtuner
e5e24c9 verified
raw
history blame
1.99 kB
#!/usr/bin/env bash
set -euo pipefail
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# CONFIGURATION
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# GPUs and DDP settings
export CUDA_VISIBLE_DEVICES="4,5,6,7"
export NPROC_PER_NODE=4
# Paths
DATASET_DIR="/data/qingq/PathVLM/baselines/github/SlideChat/dataset/stage_2_vqa_by_tumor/stage2_vqa_tumor_"
SCRIPT_DIR="configs/slidechat/experiments"
DEEPSPEED_CONFIG="configs/deepspeed/deepspeed_zero2.json"
OUTPUT_BASE="/data/qingq/PathVLM/baselines/github/SlideChat/models/outputs"
# Suffix for your work‐dirs
WORKDIR_SUFFIX="original_2048maxlength_train_token_reducer_attn"
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
# LOOP
# —––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
for json_path in "${DATASET_DIR}"/*.json; do
# extract e.g. "BLCA" then lowercase -> "blca"
tumor="$(basename "${json_path}" .json)"
tumor_lc="$(echo "${tumor}" | tr '[:upper:]' '[:lower:]')"
cfg="${SCRIPT_DIR}/stage2_reducer_attn_${tumor_lc}.py"
workdir="${OUTPUT_BASE}/${tumor_lc}_${WORKDIR_SUFFIX}"
if [[ ! -f "${cfg}" ]]; then
echo "⚠️ Config not found for ${tumor_lc}, skipping: ${cfg}"
continue
fi
echo "πŸš€ Starting SFT on ${tumor_lc}"
xtuner train \
"${cfg}" \
--deepspeed "${DEEPSPEED_CONFIG}" \
--work-dir "${workdir}" \
--local_rank 0
echo "βœ… Finished ${tumor_lc}"
echo
done
echo "πŸŽ‰ All jobs submitted."