diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/20250925_202658.log b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/20250925_202658.log new file mode 100644 index 0000000000000000000000000000000000000000..e5cc3cf1983847a5ff12bb48318b7446393694d1 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/20250925_202658.log @@ -0,0 +1,2319 @@ +2025/09/25 20:26:58 - mmengine - INFO - +------------------------------------------------------------ +System environment: + sys.platform: linux + Python: 3.11.2 (main, May 2 2024, 11:59:08) [GCC 12.2.0] + CUDA available: True + MUSA available: False + numpy_random_seed: 145980489 + GPU 0,1,2,3,4,5,6,7: NVIDIA H100 80GB HBM3 + CUDA_HOME: /usr/local/cuda + NVCC: Cuda compilation tools, release 12.4, V12.4.131 + GCC: x86_64-linux-gnu-gcc (Debian 12.2.0-14) 12.2.0 + PyTorch: 2.4.1 + PyTorch compiling details: PyTorch built with: + - GCC 12.2 + - C++ Version: 201703 + - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications + - Intel(R) MKL-DNN v3.4.2 (Git Hash 1137e04ec0b5251ca2b4400a4fd3c667ce843d67) + - OpenMP 201511 (a.k.a. OpenMP 4.5) + - LAPACK is enabled (usually provided by MKL) + - NNPACK is enabled + - CPU capability usage: AVX512 + - CUDA Runtime 12.4 + - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90 + - CuDNN 90.4 (built against CUDA 12.6) + - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.4.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0 -Wno-uninitialized -Wno-maybe-uninitialized -Wno-nonnull -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, + + TorchVision: 0.19.1+cu124 + OpenCV: 4.10.0 + MMEngine: 0.10.7 + +Runtime environment: + launcher: pytorch + randomness: {'deterministic': False, 'seed': None} + cudnn_benchmark: False + dist_cfg: {'backend': 'nccl'} + mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} + deterministic: False + seed: None + Distributed launcher: pytorch + Distributed training: True + GPU number: 8 +------------------------------------------------------------ + +2025/09/25 20:26:59 - mmengine - INFO - Config: +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = None +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-06, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2a_easy.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a' + +2025/09/25 20:27:01 - mmengine - WARNING - Failed to search registry with scope "mmengine" in the "builder" registry tree. As a workaround, the current "builder" registry in "xtuner" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "mmengine" is a correct scope, or whether the registry is initialized. +2025/09/25 20:27:03 - mmengine - INFO - Hooks will be executed in the following order: +before_run: +(VERY_HIGH ) RuntimeInfoHook +(55 ) ThroughputHook +(BELOW_NORMAL) LoggerHook + -------------------- +before_train: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(NORMAL ) DatasetInfoHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +before_train_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(NORMAL ) DistSamplerSeedHook + -------------------- +before_train_iter: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook + -------------------- +after_train_iter: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(55 ) ThroughputHook +(BELOW_NORMAL) LoggerHook +(LOW ) ParamSchedulerHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +after_train_epoch: +(NORMAL ) IterTimerHook +(LOW ) ParamSchedulerHook +(VERY_LOW ) CheckpointHook + -------------------- +before_val: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) DatasetInfoHook + -------------------- +before_val_epoch: +(NORMAL ) IterTimerHook + -------------------- +before_val_iter: +(NORMAL ) IterTimerHook + -------------------- +after_val_iter: +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_val_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook +(LOW ) ParamSchedulerHook +(VERY_LOW ) CheckpointHook + -------------------- +after_val: +(VERY_HIGH ) RuntimeInfoHook +(LOW ) EvaluateChatHook + -------------------- +after_train: +(VERY_HIGH ) RuntimeInfoHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +before_test: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) DatasetInfoHook + -------------------- +before_test_epoch: +(NORMAL ) IterTimerHook + -------------------- +before_test_iter: +(NORMAL ) IterTimerHook + -------------------- +after_test_iter: +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_test_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_test: +(VERY_HIGH ) RuntimeInfoHook + -------------------- +after_run: +(BELOW_NORMAL) LoggerHook + -------------------- +2025/09/25 20:27:05 - mmengine - INFO - Loading unwanted prefixes from: /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv +2025/09/25 20:27:05 - mmengine - INFO - Loaded 210 prefixes to filter out. +2025/09/25 20:27:05 - mmengine - INFO - Filtered out 1009 samples. +2025/09/25 20:27:05 - mmengine - INFO - [DEBUG] dataset full size used. +2025/09/25 20:27:06 - mmengine - INFO - xtuner_dataset_timeout = 1:00:00 +2025/09/25 20:28:35 - mmengine - WARNING - Dataset LLaVADataset has no metainfo. ``dataset_meta`` in visualizer will be None. +2025/09/25 20:28:36 - mmengine - INFO - train_stage == 2 +2025/09/25 20:29:06 - mmengine - INFO - using simple Resampler with 100 latents +2025/09/25 20:29:40 - mmengine - INFO - enable projector input require grads +2025/09/25 20:29:40 - mmengine - INFO - enable input required grads for projector +2025/09/25 20:29:40 - mmengine - INFO - Building lora +2025/09/25 20:29:44 - mmengine - INFO - loading token_merge from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors +2025/09/25 20:29:44 - mmengine - INFO - Loading projector from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors +2025/09/25 20:29:44 - mmengine - INFO - Loading resampler from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.embed_tokens.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.0.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.1.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.2.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.3.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.4.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.5.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.6.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.7.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.8.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.9.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.10.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.11.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.12.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.13.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.14.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.15.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.16.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.17.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.18.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.19.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.20.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.21.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.22.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.23.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.24.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.25.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.26.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.layers.27.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.model.norm.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - WARNING - llm.base_model.model.lm_head.weight is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- token_merge.ln_in.weight:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- token_merge.ln_in.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- token_merge.ln_out.weight:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- token_merge.ln_out.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- projector.model.0.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- projector.model.2.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - WARNING - resampler.query_pos_embed is skipped since its requires_grad=False +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.attn.out_proj.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.ln_q.weight:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.ln_q.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.ln_kv.weight:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.ln_kv.bias:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.ln_post.weight:weight_decay=0.0 +2025/09/25 20:29:51 - mmengine - INFO - paramwise_options -- resampler.ln_post.bias:weight_decay=0.0 +2025/09/25 20:30:08 - mmengine - INFO - Num train samples 59612 +2025/09/25 20:30:08 - mmengine - INFO - train example: +2025/09/25 20:30:09 - mmengine - INFO - <|im_start|>user + +Can you describe the pattern of tumor infiltration observed in this specimen? A) Tumor cells are primarily invading stroma without vascular or neural invasion. B) Tumor cells show extensive vascular and neural infiltration. C) Tumor cells remain confined to well-defined structures without invasive behavior.<|im_end|> +<|im_start|>assistant +A) Tumor cells are primarily invading stroma without vascular or neural invasion.<|im_end|> + +2025/09/25 20:30:09 - mmengine - INFO - before_train in EvaluateChatHook. +2025/09/25 20:30:09 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:30:10 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern within the slide.<|im_end|> + +2025/09/25 20:30:10 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:30:14 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report for the whole slide image of the breast biopsy specimen is as follows: + +1. **Diagnosis:** Invasive ductal carcinoma of the breast, grade 2, with features of ductal carcinoma in situ (DCIS). + +2. + +2025/09/25 20:30:14 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:30:18 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +Based on the features described, the most likely histological classification of the tumor is: + +A) Poorly differentiated keratinizing squamous cell carcinoma + +This classification is consistent with the presence of keratin pearls, which are a characteristic feature of keratin + +2025/09/25 20:30:18 - mmengine - INFO - [check] projector params NOT in optimizer: ['model.0.weight', 'model.0.bias', 'model.2.weight', 'model.2.bias'] +2025/09/25 20:30:18 - mmengine - INFO - [Trainable] 407,778,304 / 4,761,108,992 params (8.56%) +2025/09/25 20:30:18 - mmengine - INFO - llm.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +2025/09/25 20:30:18 - mmengine - INFO - ... (212 more trainable tensors not shown) +2025/09/25 20:30:18 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io +2025/09/25 20:30:18 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. +2025/09/25 20:30:18 - mmengine - INFO - Checkpoints will be saved to /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a. +2025/09/25 20:30:29 - mmengine - INFO - Iter(train) [ 10/7452] base_lr: 4.3952e-08 lr: 4.3952e-08 eta: 2:13:41 time: 1.0588 data_time: 0.0083 memory: 15740 loss: 2.1562 tflops: 3.5971 tokens_per_sec: 59.5002 +2025/09/25 20:30:40 - mmengine - INFO - Iter(train) [ 20/7452] base_lr: 7.0565e-08 lr: 7.0565e-08 eta: 2:13:52 time: 1.0318 data_time: 0.0079 memory: 15740 loss: 0.3262 tflops: 7.5651 tokens_per_sec: 125.0253 +2025/09/25 20:30:51 - mmengine - INFO - Iter(train) [ 30/7452] base_lr: 9.7177e-08 lr: 9.7177e-08 eta: 2:13:00 time: 1.0425 data_time: 0.0077 memory: 15740 loss: 1.6875 tflops: 4.0018 tokens_per_sec: 66.1884 +2025/09/25 20:31:02 - mmengine - INFO - Iter(train) [ 40/7452] base_lr: 1.2379e-07 lr: 1.2379e-07 eta: 2:13:03 time: 1.0456 data_time: 0.0079 memory: 15739 loss: 1.7734 tflops: 4.2215 tokens_per_sec: 69.8196 +2025/09/25 20:31:13 - mmengine - INFO - Iter(train) [ 50/7452] base_lr: 1.5040e-07 lr: 1.5040e-07 eta: 2:15:23 time: 1.0617 data_time: 0.0084 memory: 15742 loss: 0.5547 tflops: 6.4388 tokens_per_sec: 106.4348 +2025/09/25 20:31:24 - mmengine - INFO - Iter(train) [ 60/7452] base_lr: 1.7702e-07 lr: 1.7702e-07 eta: 2:14:33 time: 1.0494 data_time: 0.0083 memory: 15740 loss: 0.5938 tflops: 6.0522 tokens_per_sec: 100.0539 +2025/09/25 20:31:35 - mmengine - INFO - Iter(train) [ 70/7452] base_lr: 2.0363e-07 lr: 2.0363e-07 eta: 2:15:16 time: 1.0531 data_time: 0.0084 memory: 16130 loss: 0.3125 tflops: 5.9737 tokens_per_sec: 98.7585 +2025/09/25 20:31:47 - mmengine - INFO - Iter(train) [ 80/7452] base_lr: 2.3024e-07 lr: 2.3024e-07 eta: 2:15:18 time: 1.0476 data_time: 0.0081 memory: 16130 loss: 0.3945 tflops: 6.7569 tokens_per_sec: 111.6866 +2025/09/25 20:31:57 - mmengine - INFO - Iter(train) [ 90/7452] base_lr: 2.5685e-07 lr: 2.5685e-07 eta: 2:14:58 time: 1.1176 data_time: 0.0087 memory: 16131 loss: 2.5312 tflops: 5.0868 tokens_per_sec: 84.1062 +2025/09/25 20:32:09 - mmengine - INFO - Iter(train) [ 100/7452] base_lr: 2.8347e-07 lr: 2.8347e-07 eta: 2:14:50 time: 1.0414 data_time: 0.0085 memory: 16130 loss: 0.2812 tflops: 6.5060 tokens_per_sec: 107.5459 +2025/09/25 20:32:20 - mmengine - INFO - Iter(train) [ 110/7452] base_lr: 3.1008e-07 lr: 3.1008e-07 eta: 2:15:14 time: 1.0598 data_time: 0.0088 memory: 16131 loss: 0.9375 tflops: 5.6498 tokens_per_sec: 93.4102 +2025/09/25 20:32:31 - mmengine - INFO - Iter(train) [ 120/7452] base_lr: 3.3669e-07 lr: 3.3669e-07 eta: 2:15:05 time: 1.1667 data_time: 0.0083 memory: 16131 loss: 1.2656 tflops: 3.2644 tokens_per_sec: 53.9973 +2025/09/25 20:32:43 - mmengine - INFO - Iter(train) [ 130/7452] base_lr: 3.6331e-07 lr: 3.6331e-07 eta: 2:15:30 time: 1.0905 data_time: 0.0086 memory: 16130 loss: 1.7500 tflops: 2.3832 tokens_per_sec: 39.4308 +2025/09/25 20:32:54 - mmengine - INFO - Iter(train) [ 140/7452] base_lr: 3.8992e-07 lr: 3.8992e-07 eta: 2:15:30 time: 1.2434 data_time: 0.0081 memory: 16131 loss: 0.1992 tflops: 6.0337 tokens_per_sec: 99.7241 +2025/09/25 20:33:05 - mmengine - INFO - Iter(train) [ 150/7452] base_lr: 4.1653e-07 lr: 4.1653e-07 eta: 2:14:55 time: 1.0202 data_time: 0.0083 memory: 16130 loss: 1.8516 tflops: 5.2163 tokens_per_sec: 86.2551 +2025/09/25 20:33:16 - mmengine - INFO - Iter(train) [ 160/7452] base_lr: 4.4315e-07 lr: 4.4315e-07 eta: 2:14:38 time: 1.0395 data_time: 0.0078 memory: 16130 loss: 1.7891 tflops: 3.8387 tokens_per_sec: 63.4933 +2025/09/25 20:33:27 - mmengine - INFO - Iter(train) [ 170/7452] base_lr: 4.6976e-07 lr: 4.6976e-07 eta: 2:14:32 time: 1.0166 data_time: 0.0085 memory: 16131 loss: 2.4375 tflops: 4.1631 tokens_per_sec: 68.8567 +2025/09/25 20:33:38 - mmengine - INFO - Iter(train) [ 180/7452] base_lr: 4.9637e-07 lr: 4.9637e-07 eta: 2:14:07 time: 1.0778 data_time: 0.0083 memory: 16131 loss: 0.3613 tflops: 6.7361 tokens_per_sec: 111.3388 +2025/09/25 20:33:49 - mmengine - INFO - Iter(train) [ 190/7452] base_lr: 5.2298e-07 lr: 5.2298e-07 eta: 2:13:57 time: 1.0407 data_time: 0.0081 memory: 16131 loss: 0.7500 tflops: 6.6854 tokens_per_sec: 110.5070 +2025/09/25 20:34:00 - mmengine - INFO - Iter(train) [ 200/7452] base_lr: 5.4960e-07 lr: 5.4960e-07 eta: 2:13:59 time: 1.0235 data_time: 0.0084 memory: 16131 loss: 0.8438 tflops: 6.2053 tokens_per_sec: 102.5855 +2025/09/25 20:34:11 - mmengine - INFO - Iter(train) [ 210/7452] base_lr: 5.7621e-07 lr: 5.7621e-07 eta: 2:13:41 time: 1.2279 data_time: 0.0084 memory: 16130 loss: 0.3066 tflops: 5.1724 tokens_per_sec: 85.5089 +2025/09/25 20:34:22 - mmengine - INFO - Iter(train) [ 220/7452] base_lr: 6.0282e-07 lr: 6.0282e-07 eta: 2:13:24 time: 1.0067 data_time: 0.0081 memory: 16132 loss: 0.4141 tflops: 6.9710 tokens_per_sec: 115.2278 +2025/09/25 20:34:33 - mmengine - INFO - Iter(train) [ 230/7452] base_lr: 6.2944e-07 lr: 6.2944e-07 eta: 2:13:24 time: 1.0299 data_time: 0.0086 memory: 16131 loss: 1.3047 tflops: 4.0505 tokens_per_sec: 66.9953 +2025/09/25 20:34:44 - mmengine - INFO - Iter(train) [ 240/7452] base_lr: 6.5605e-07 lr: 6.5605e-07 eta: 2:13:07 time: 1.0510 data_time: 0.0088 memory: 16131 loss: 2.0938 tflops: 6.6775 tokens_per_sec: 110.3750 +2025/09/25 20:34:55 - mmengine - INFO - Iter(train) [ 250/7452] base_lr: 6.8266e-07 lr: 6.8266e-07 eta: 2:12:55 time: 1.0274 data_time: 0.0082 memory: 16130 loss: 1.4062 tflops: 3.5894 tokens_per_sec: 59.3741 +2025/09/25 20:35:07 - mmengine - INFO - Iter(train) [ 260/7452] base_lr: 7.0927e-07 lr: 7.0927e-07 eta: 2:13:03 time: 1.2792 data_time: 0.0078 memory: 16131 loss: 1.9844 tflops: 4.8702 tokens_per_sec: 80.5162 +2025/09/25 20:35:18 - mmengine - INFO - Iter(train) [ 270/7452] base_lr: 7.3589e-07 lr: 7.3589e-07 eta: 2:12:41 time: 1.1923 data_time: 0.0082 memory: 16131 loss: 0.2949 tflops: 6.6994 tokens_per_sec: 110.7148 +2025/09/25 20:35:29 - mmengine - INFO - Iter(train) [ 280/7452] base_lr: 7.6250e-07 lr: 7.6250e-07 eta: 2:12:34 time: 1.2522 data_time: 0.0082 memory: 16131 loss: 2.5000 tflops: 4.5886 tokens_per_sec: 75.8678 +2025/09/25 20:35:40 - mmengine - INFO - Iter(train) [ 290/7452] base_lr: 7.8911e-07 lr: 7.8911e-07 eta: 2:12:32 time: 1.0202 data_time: 0.0077 memory: 16131 loss: 0.7539 tflops: 6.4036 tokens_per_sec: 105.8598 +2025/09/25 20:35:51 - mmengine - INFO - Iter(train) [ 300/7452] base_lr: 8.1573e-07 lr: 8.1573e-07 eta: 2:12:10 time: 1.0804 data_time: 0.0083 memory: 16130 loss: 2.7812 tflops: 4.8137 tokens_per_sec: 79.6001 +2025/09/25 20:36:02 - mmengine - INFO - Iter(train) [ 310/7452] base_lr: 8.4234e-07 lr: 8.4234e-07 eta: 2:12:04 time: 1.2501 data_time: 0.0079 memory: 16130 loss: 0.5273 tflops: 4.9355 tokens_per_sec: 81.5961 +2025/09/25 20:36:14 - mmengine - INFO - Iter(train) [ 320/7452] base_lr: 8.6895e-07 lr: 8.6895e-07 eta: 2:12:04 time: 1.3180 data_time: 0.0084 memory: 16131 loss: 0.1465 tflops: 7.2564 tokens_per_sec: 119.8782 +2025/09/25 20:36:25 - mmengine - INFO - Iter(train) [ 330/7452] base_lr: 8.9556e-07 lr: 8.9556e-07 eta: 2:11:43 time: 1.0330 data_time: 0.0082 memory: 16130 loss: 0.2676 tflops: 6.9108 tokens_per_sec: 114.2289 +2025/09/25 20:36:36 - mmengine - INFO - Iter(train) [ 340/7452] base_lr: 9.2218e-07 lr: 9.2218e-07 eta: 2:11:33 time: 1.0410 data_time: 0.0082 memory: 16132 loss: 0.5859 tflops: 6.2178 tokens_per_sec: 102.7896 +2025/09/25 20:36:47 - mmengine - INFO - Iter(train) [ 350/7452] base_lr: 9.4879e-07 lr: 9.4879e-07 eta: 2:11:25 time: 1.0288 data_time: 0.0088 memory: 16131 loss: 0.2031 tflops: 6.3501 tokens_per_sec: 104.9758 +2025/09/25 20:36:58 - mmengine - INFO - Iter(train) [ 360/7452] base_lr: 9.7540e-07 lr: 9.7540e-07 eta: 2:11:02 time: 1.0657 data_time: 0.0089 memory: 16131 loss: 0.1406 tflops: 6.9262 tokens_per_sec: 114.4778 +2025/09/25 20:37:09 - mmengine - INFO - Iter(train) [ 370/7452] base_lr: 1.0020e-06 lr: 1.0020e-06 eta: 2:10:50 time: 1.0952 data_time: 0.0084 memory: 16131 loss: 1.8594 tflops: 7.9576 tokens_per_sec: 131.4860 +2025/09/25 20:37:20 - mmengine - INFO - Iter(train) [ 380/7452] base_lr: 1.0286e-06 lr: 1.0286e-06 eta: 2:10:47 time: 1.0826 data_time: 0.0084 memory: 16131 loss: 2.0938 tflops: 6.7062 tokens_per_sec: 110.8441 +2025/09/25 20:37:31 - mmengine - INFO - Iter(train) [ 390/7452] base_lr: 1.0552e-06 lr: 1.0552e-06 eta: 2:10:33 time: 1.0399 data_time: 0.0083 memory: 16131 loss: 0.3066 tflops: 7.4475 tokens_per_sec: 123.0845 +2025/09/25 20:37:42 - mmengine - INFO - Iter(train) [ 400/7452] base_lr: 1.0819e-06 lr: 1.0819e-06 eta: 2:10:21 time: 1.0643 data_time: 0.0088 memory: 16131 loss: 0.2158 tflops: 7.3913 tokens_per_sec: 122.1515 +2025/09/25 20:37:53 - mmengine - INFO - Iter(train) [ 410/7452] base_lr: 1.1085e-06 lr: 1.1085e-06 eta: 2:10:11 time: 1.0673 data_time: 0.0092 memory: 16130 loss: 2.3594 tflops: 6.6317 tokens_per_sec: 109.6173 +2025/09/25 20:38:04 - mmengine - INFO - Iter(train) [ 420/7452] base_lr: 1.1351e-06 lr: 1.1351e-06 eta: 2:09:56 time: 1.0594 data_time: 0.0080 memory: 16131 loss: 2.5625 tflops: 4.3949 tokens_per_sec: 72.6823 +2025/09/25 20:38:15 - mmengine - INFO - Iter(train) [ 430/7452] base_lr: 1.1617e-06 lr: 1.1617e-06 eta: 2:09:49 time: 1.0521 data_time: 0.0082 memory: 16131 loss: 0.2598 tflops: 6.3250 tokens_per_sec: 104.5576 +2025/09/25 20:38:27 - mmengine - INFO - Iter(train) [ 440/7452] base_lr: 1.1883e-06 lr: 1.1883e-06 eta: 2:09:39 time: 1.0134 data_time: 0.0082 memory: 16131 loss: 0.2656 tflops: 6.6855 tokens_per_sec: 110.5143 +2025/09/25 20:38:38 - mmengine - INFO - Iter(train) [ 450/7452] base_lr: 1.2149e-06 lr: 1.2149e-06 eta: 2:09:29 time: 1.0389 data_time: 0.0087 memory: 16131 loss: 0.1797 tflops: 7.9799 tokens_per_sec: 131.8677 +2025/09/25 20:38:49 - mmengine - INFO - Iter(train) [ 460/7452] base_lr: 1.2415e-06 lr: 1.2415e-06 eta: 2:09:23 time: 1.3634 data_time: 0.0083 memory: 16131 loss: 1.3594 tflops: 3.4592 tokens_per_sec: 57.2083 +2025/09/25 20:39:00 - mmengine - INFO - Iter(train) [ 470/7452] base_lr: 1.2681e-06 lr: 1.2681e-06 eta: 2:09:04 time: 1.0664 data_time: 0.0087 memory: 16130 loss: 2.6094 tflops: 4.3092 tokens_per_sec: 71.2666 +2025/09/25 20:39:11 - mmengine - INFO - Iter(train) [ 480/7452] base_lr: 1.2948e-06 lr: 1.2948e-06 eta: 2:08:51 time: 1.0413 data_time: 0.0084 memory: 16131 loss: 0.3086 tflops: 6.5067 tokens_per_sec: 107.5588 +2025/09/25 20:39:22 - mmengine - INFO - Iter(train) [ 490/7452] base_lr: 1.3214e-06 lr: 1.3214e-06 eta: 2:08:47 time: 1.4174 data_time: 0.0079 memory: 16131 loss: 0.3145 tflops: 6.0203 tokens_per_sec: 99.4787 +2025/09/25 20:39:33 - mmengine - INFO - Iter(train) [ 500/7452] base_lr: 1.3480e-06 lr: 1.3480e-06 eta: 2:08:31 time: 1.1121 data_time: 0.0086 memory: 16131 loss: 2.4375 tflops: 6.1472 tokens_per_sec: 101.6137 +2025/09/25 20:39:44 - mmengine - INFO - Iter(train) [ 510/7452] base_lr: 1.3746e-06 lr: 1.3746e-06 eta: 2:08:20 time: 1.1909 data_time: 0.0081 memory: 16131 loss: 1.6484 tflops: 5.0790 tokens_per_sec: 83.9718 +2025/09/25 20:39:47 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 20:39:47 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:39:48 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern within the slide.<|im_end|> + +2025/09/25 20:39:48 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:40:38 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report for the whole slide image of the breast tissue biopsy is as follows: + +1. **Diagnosis:** + - **Invasive Ductal Carcinoma (IDC) of the Breast:** The biopsy specimen demonstrates invasive ductal carcinoma of the breast, which is a type of breast cancer. The tumor cells are of ductal origin and have invaded the surrounding stroma. + +2. **Histological Features:** + - **Grade:** The tumor is classified as Grade 3, indicating a high-grade malignancy. This grade suggests a more aggressive tumor with a higher potential for rapid growth and metastasis. + - **Size:** The tumor measures approximately 2.5 cm in its largest dimension. + - **Margins:** The surgical margins are free of tumor, indicating that the entire tumor has been excised. + - **Invasion:** The tumor shows extensive invasion into the surrounding breast tissue, involving the dermal and subcutaneous fat layers. + - **Lymphovascular Invasion:** There is evidence of lymphovascular invasion, which is a significant finding as it suggests the potential for metastasis via the lymphatic system. + - **Nuclear Features:** The tumor cells show marked pleomorphism, with large, irregular nuclei and prominent nucleoli, indicative of a high-grade tumor. + - **Mitotic Activity:** There is a high mitotic activity, with more than 10 mitoses per 10 high-power fields (HPF), which is a strong indicator of a high-grade tumor. + - **Proliferation Index:** The Ki-67 proliferation index is 60-70%, indicating a high rate of cell proliferation. + - **Infiltration:** The tumor shows extensive infiltration of the breast tissue, involving the dermis and subcutaneous fat. + +3. **Microscopic Findings:** + - **Tumor Cells:** The tumor cells are predominantly ductal in origin, with a poorly differentiated morphology, showing marked pleomorphism and anaplasia. + - **Stromal Changes:** The stroma surrounding the tumor shows reactive changes, with increased fibroblast activity and collagen deposition. + - **Inflammatory Response:** There is a significant inflammatory response, with a dense infiltrate of lymphocytes and macrophages around the tumor. + +4. **Additional Findings:** + - **No Evidence of:** + - **In Situ Carcinoma:** No evidence of ductal carcinoma in situ (DCIS) within the specimen. + - **Invasive Lobular Carcinoma:** No evidence of invasive lobular carcinoma. + - **Invasive Mucinous Carcinoma:** No evidence of mucinous carcinoma. + - **Invasive Medullary Carcinoma:** No evidence of medullary carcinoma. + - **Invasive Tubular Carcinoma:** No evidence of tubular carcinoma. + - **Invasive Papillary Carcinoma:** + +2025/09/25 20:40:38 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:40:57 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +Based on the features described, the most likely histological classification of the tumor is: + +A) Poorly differentiated keratinizing squamous cell carcinoma + +This classification is consistent with the presence of keratin pearls, which are a characteristic feature of keratinizing squamous cell carcinoma. Keratin pearls are small, round masses of keratin that form within the cytoplasm of squamous cells and are often seen in well-differentiated to poorly differentiated keratinizing squamous cell carcinomas. + +The other options are less likely given the presence of keratin pearls: + +- B) Moderately differentiated squamous cell carcinoma typically shows a mix of keratinizing and non-keratinizing areas, but usually not as well-differentiated as keratinizing squamous cell carcinoma. +- C) Well-differentiated squamous cell carcinoma usually shows more keratinization and less atypia. +- D) Adenocarcinoma is a different type of carcinoma that arises from glandular tissue, not squamous cells. + +The presence of keratin pearls, along with the other features mentioned, strongly supports the diagnosis of poorly differentiated keratinizing squamous cell carcinoma.<|im_end|> + +2025/09/25 20:41:06 - mmengine - INFO - Iter(train) [ 520/7452] base_lr: 1.4012e-06 lr: 1.4012e-06 eta: 2:23:58 time: 1.0396 data_time: 0.0083 memory: 16131 loss: 1.9062 tflops: 8.1497 tokens_per_sec: 134.6671 +2025/09/25 20:41:17 - mmengine - INFO - Iter(train) [ 530/7452] base_lr: 1.4278e-06 lr: 1.4278e-06 eta: 2:23:23 time: 1.0447 data_time: 0.0081 memory: 16131 loss: 2.3438 tflops: 4.3987 tokens_per_sec: 72.7464 +2025/09/25 20:41:28 - mmengine - INFO - Iter(train) [ 540/7452] base_lr: 1.4544e-06 lr: 1.4544e-06 eta: 2:22:51 time: 1.1067 data_time: 0.0084 memory: 16131 loss: 2.5469 tflops: 7.1078 tokens_per_sec: 117.4670 +2025/09/25 20:41:39 - mmengine - INFO - Iter(train) [ 550/7452] base_lr: 1.4810e-06 lr: 1.4810e-06 eta: 2:22:21 time: 1.2615 data_time: 0.0084 memory: 16130 loss: 1.9609 tflops: 3.3549 tokens_per_sec: 55.4879 +2025/09/25 20:41:50 - mmengine - INFO - Iter(train) [ 560/7452] base_lr: 1.5077e-06 lr: 1.5077e-06 eta: 2:21:52 time: 1.0371 data_time: 0.0080 memory: 16130 loss: 0.2617 tflops: 7.5849 tokens_per_sec: 125.3516 +2025/09/25 20:42:01 - mmengine - INFO - Iter(train) [ 570/7452] base_lr: 1.5343e-06 lr: 1.5343e-06 eta: 2:21:20 time: 1.0406 data_time: 0.0086 memory: 16131 loss: 2.3594 tflops: 7.5007 tokens_per_sec: 123.9611 +2025/09/25 20:42:12 - mmengine - INFO - Iter(train) [ 580/7452] base_lr: 1.5609e-06 lr: 1.5609e-06 eta: 2:20:56 time: 1.0319 data_time: 0.0080 memory: 16130 loss: 0.2295 tflops: 7.1529 tokens_per_sec: 118.2238 +2025/09/25 20:42:23 - mmengine - INFO - Iter(train) [ 590/7452] base_lr: 1.5875e-06 lr: 1.5875e-06 eta: 2:20:26 time: 1.0612 data_time: 0.0102 memory: 16131 loss: 1.3516 tflops: 3.4749 tokens_per_sec: 57.4802 +2025/09/25 20:42:34 - mmengine - INFO - Iter(train) [ 600/7452] base_lr: 1.6141e-06 lr: 1.6141e-06 eta: 2:19:57 time: 1.0709 data_time: 0.0089 memory: 16131 loss: 1.9062 tflops: 6.6095 tokens_per_sec: 109.2499 +2025/09/25 20:42:45 - mmengine - INFO - Iter(train) [ 610/7452] base_lr: 1.6407e-06 lr: 1.6407e-06 eta: 2:19:33 time: 1.0255 data_time: 0.0080 memory: 16130 loss: 3.0156 tflops: 4.3042 tokens_per_sec: 71.1862 +2025/09/25 20:42:56 - mmengine - INFO - Iter(train) [ 620/7452] base_lr: 1.6673e-06 lr: 1.6673e-06 eta: 2:19:08 time: 1.0109 data_time: 0.0087 memory: 16131 loss: 2.5625 tflops: 7.0017 tokens_per_sec: 115.7336 +2025/09/25 20:43:07 - mmengine - INFO - Iter(train) [ 630/7452] base_lr: 1.6940e-06 lr: 1.6940e-06 eta: 2:18:40 time: 1.0165 data_time: 0.0082 memory: 16131 loss: 0.4277 tflops: 6.8442 tokens_per_sec: 113.1333 +2025/09/25 20:43:18 - mmengine - INFO - Iter(train) [ 640/7452] base_lr: 1.7206e-06 lr: 1.7206e-06 eta: 2:18:21 time: 1.3150 data_time: 0.0085 memory: 16130 loss: 2.4844 tflops: 4.0470 tokens_per_sec: 66.9199 +2025/09/25 20:43:30 - mmengine - INFO - Iter(train) [ 650/7452] base_lr: 1.7472e-06 lr: 1.7472e-06 eta: 2:18:00 time: 1.0986 data_time: 0.0084 memory: 16131 loss: 1.0781 tflops: 4.0729 tokens_per_sec: 67.3602 +2025/09/25 20:43:41 - mmengine - INFO - Iter(train) [ 660/7452] base_lr: 1.7738e-06 lr: 1.7738e-06 eta: 2:17:39 time: 1.1228 data_time: 0.0084 memory: 16130 loss: 1.9297 tflops: 3.9312 tokens_per_sec: 65.0183 +2025/09/25 20:43:52 - mmengine - INFO - Iter(train) [ 670/7452] base_lr: 1.8004e-06 lr: 1.8004e-06 eta: 2:17:18 time: 1.0657 data_time: 0.0085 memory: 16130 loss: 1.1797 tflops: 3.9145 tokens_per_sec: 64.7451 +2025/09/25 20:44:04 - mmengine - INFO - Iter(train) [ 680/7452] base_lr: 1.8270e-06 lr: 1.8270e-06 eta: 2:16:59 time: 1.3634 data_time: 0.0080 memory: 16131 loss: 0.0947 tflops: 6.7480 tokens_per_sec: 111.4877 +2025/09/25 20:44:15 - mmengine - INFO - Iter(train) [ 690/7452] base_lr: 1.8536e-06 lr: 1.8536e-06 eta: 2:16:34 time: 1.2329 data_time: 0.0082 memory: 16131 loss: 0.4297 tflops: 4.3165 tokens_per_sec: 71.3756 +2025/09/25 20:44:26 - mmengine - INFO - Iter(train) [ 700/7452] base_lr: 1.8802e-06 lr: 1.8802e-06 eta: 2:16:12 time: 1.2416 data_time: 0.0085 memory: 16130 loss: 1.6953 tflops: 2.6291 tokens_per_sec: 43.4932 +2025/09/25 20:44:37 - mmengine - INFO - Iter(train) [ 710/7452] base_lr: 1.9069e-06 lr: 1.9069e-06 eta: 2:15:52 time: 1.0682 data_time: 0.0082 memory: 16131 loss: 0.2871 tflops: 6.1726 tokens_per_sec: 102.0391 +2025/09/25 20:44:48 - mmengine - INFO - Iter(train) [ 720/7452] base_lr: 1.9335e-06 lr: 1.9335e-06 eta: 2:15:30 time: 1.2647 data_time: 0.0085 memory: 16131 loss: 2.6562 tflops: 3.8728 tokens_per_sec: 64.0454 +2025/09/25 20:45:00 - mmengine - INFO - Iter(train) [ 730/7452] base_lr: 1.9601e-06 lr: 1.9601e-06 eta: 2:15:14 time: 1.3937 data_time: 0.0081 memory: 16130 loss: 0.1963 tflops: 5.0354 tokens_per_sec: 83.2327 +2025/09/25 20:45:10 - mmengine - INFO - Iter(train) [ 740/7452] base_lr: 1.9867e-06 lr: 1.9867e-06 eta: 2:14:49 time: 1.0434 data_time: 0.0083 memory: 16131 loss: 0.1865 tflops: 8.1781 tokens_per_sec: 135.1348 +2025/09/25 20:45:22 - mmengine - INFO - Iter(train) [ 750/7452] base_lr: 2.0000e-06 lr: 2.0000e-06 eta: 2:14:30 time: 1.3108 data_time: 0.0085 memory: 16131 loss: 0.1621 tflops: 6.2322 tokens_per_sec: 102.9899 +2025/09/25 20:45:32 - mmengine - INFO - Iter(train) [ 760/7452] base_lr: 2.0000e-06 lr: 2.0000e-06 eta: 2:14:07 time: 1.0354 data_time: 0.0087 memory: 16130 loss: 2.1719 tflops: 4.6136 tokens_per_sec: 76.2981 +2025/09/25 20:45:44 - mmengine - INFO - Iter(train) [ 770/7452] base_lr: 2.0000e-06 lr: 2.0000e-06 eta: 2:13:49 time: 1.0586 data_time: 0.0083 memory: 16130 loss: 2.0000 tflops: 5.4277 tokens_per_sec: 89.7422 +2025/09/25 20:45:55 - mmengine - INFO - Iter(train) [ 780/7452] base_lr: 2.0000e-06 lr: 2.0000e-06 eta: 2:13:26 time: 1.0834 data_time: 0.0084 memory: 16131 loss: 2.2500 tflops: 4.9683 tokens_per_sec: 82.1523 +2025/09/25 20:46:06 - mmengine - INFO - Iter(train) [ 790/7452] base_lr: 2.0000e-06 lr: 2.0000e-06 eta: 2:13:06 time: 1.0138 data_time: 0.0080 memory: 16131 loss: 0.4395 tflops: 5.4884 tokens_per_sec: 90.7501 +2025/09/25 20:46:17 - mmengine - INFO - Iter(train) [ 800/7452] base_lr: 1.9999e-06 lr: 1.9999e-06 eta: 2:12:46 time: 1.2393 data_time: 0.0082 memory: 16131 loss: 1.7812 tflops: 3.2198 tokens_per_sec: 53.2564 +2025/09/25 20:46:28 - mmengine - INFO - Iter(train) [ 810/7452] base_lr: 1.9999e-06 lr: 1.9999e-06 eta: 2:12:27 time: 1.0088 data_time: 0.0075 memory: 16131 loss: 0.3457 tflops: 6.7166 tokens_per_sec: 111.0274 +2025/09/25 20:46:38 - mmengine - INFO - Iter(train) [ 820/7452] base_lr: 1.9999e-06 lr: 1.9999e-06 eta: 2:12:02 time: 1.0559 data_time: 0.0081 memory: 16131 loss: 2.2969 tflops: 7.2778 tokens_per_sec: 120.2804 +2025/09/25 20:46:49 - mmengine - INFO - Iter(train) [ 830/7452] base_lr: 1.9998e-06 lr: 1.9998e-06 eta: 2:11:41 time: 1.0630 data_time: 0.0083 memory: 16131 loss: 2.4219 tflops: 7.4568 tokens_per_sec: 123.2327 +2025/09/25 20:47:00 - mmengine - INFO - Iter(train) [ 840/7452] base_lr: 1.9998e-06 lr: 1.9998e-06 eta: 2:11:27 time: 1.3299 data_time: 0.0086 memory: 16131 loss: 2.6406 tflops: 3.9107 tokens_per_sec: 64.6677 +2025/09/25 20:47:11 - mmengine - INFO - Iter(train) [ 850/7452] base_lr: 1.9997e-06 lr: 1.9997e-06 eta: 2:11:04 time: 1.0212 data_time: 0.0082 memory: 16131 loss: 2.5469 tflops: 4.6777 tokens_per_sec: 77.3579 +2025/09/25 20:47:22 - mmengine - INFO - Iter(train) [ 860/7452] base_lr: 1.9997e-06 lr: 1.9997e-06 eta: 2:10:44 time: 1.0232 data_time: 0.0080 memory: 16131 loss: 0.2197 tflops: 5.7341 tokens_per_sec: 94.8050 +2025/09/25 20:47:33 - mmengine - INFO - Iter(train) [ 870/7452] base_lr: 1.9996e-06 lr: 1.9996e-06 eta: 2:10:29 time: 1.3322 data_time: 0.0086 memory: 16131 loss: 1.6094 tflops: 3.9492 tokens_per_sec: 65.3036 +2025/09/25 20:47:44 - mmengine - INFO - Iter(train) [ 880/7452] base_lr: 1.9996e-06 lr: 1.9996e-06 eta: 2:10:06 time: 1.0334 data_time: 0.0089 memory: 16130 loss: 0.0947 tflops: 6.3219 tokens_per_sec: 104.5092 +2025/09/25 20:47:55 - mmengine - INFO - Iter(train) [ 890/7452] base_lr: 1.9995e-06 lr: 1.9995e-06 eta: 2:09:48 time: 1.0321 data_time: 0.0089 memory: 16130 loss: 0.4062 tflops: 7.3276 tokens_per_sec: 121.1066 +2025/09/25 20:48:06 - mmengine - INFO - Iter(train) [ 900/7452] base_lr: 1.9994e-06 lr: 1.9994e-06 eta: 2:09:33 time: 1.0287 data_time: 0.0084 memory: 16131 loss: 1.2891 tflops: 3.7611 tokens_per_sec: 62.2123 +2025/09/25 20:48:17 - mmengine - INFO - Iter(train) [ 910/7452] base_lr: 1.9993e-06 lr: 1.9993e-06 eta: 2:09:13 time: 1.0364 data_time: 0.0084 memory: 16131 loss: 1.8516 tflops: 4.0252 tokens_per_sec: 66.5760 +2025/09/25 20:48:28 - mmengine - INFO - Iter(train) [ 920/7452] base_lr: 1.9993e-06 lr: 1.9993e-06 eta: 2:08:54 time: 1.0552 data_time: 0.0085 memory: 16130 loss: 2.3281 tflops: 6.4783 tokens_per_sec: 107.0874 +2025/09/25 20:48:39 - mmengine - INFO - Iter(train) [ 930/7452] base_lr: 1.9992e-06 lr: 1.9992e-06 eta: 2:08:37 time: 1.0064 data_time: 0.0081 memory: 16131 loss: 0.2598 tflops: 7.5753 tokens_per_sec: 125.1995 +2025/09/25 20:48:50 - mmengine - INFO - Iter(train) [ 940/7452] base_lr: 1.9991e-06 lr: 1.9991e-06 eta: 2:08:18 time: 1.0351 data_time: 0.0083 memory: 16131 loss: 1.7031 tflops: 3.8548 tokens_per_sec: 63.7602 +2025/09/25 20:49:01 - mmengine - INFO - Iter(train) [ 950/7452] base_lr: 1.9990e-06 lr: 1.9990e-06 eta: 2:08:03 time: 1.4202 data_time: 0.0083 memory: 16131 loss: 2.1406 tflops: 3.1932 tokens_per_sec: 52.8102 +2025/09/25 20:49:12 - mmengine - INFO - Iter(train) [ 960/7452] base_lr: 1.9989e-06 lr: 1.9989e-06 eta: 2:07:46 time: 1.3724 data_time: 0.0114 memory: 16131 loss: 1.8984 tflops: 3.8778 tokens_per_sec: 64.1225 +2025/09/25 20:49:23 - mmengine - INFO - Iter(train) [ 970/7452] base_lr: 1.9988e-06 lr: 1.9988e-06 eta: 2:07:27 time: 1.0398 data_time: 0.0084 memory: 16131 loss: 0.2139 tflops: 6.9241 tokens_per_sec: 114.4474 +2025/09/25 20:49:34 - mmengine - INFO - Iter(train) [ 980/7452] base_lr: 1.9987e-06 lr: 1.9987e-06 eta: 2:07:13 time: 1.0414 data_time: 0.0086 memory: 16131 loss: 1.8359 tflops: 3.5992 tokens_per_sec: 59.5357 +2025/09/25 20:49:45 - mmengine - INFO - Iter(train) [ 990/7452] base_lr: 1.9985e-06 lr: 1.9985e-06 eta: 2:06:53 time: 1.0269 data_time: 0.0084 memory: 16131 loss: 0.1963 tflops: 6.8930 tokens_per_sec: 113.9369 +2025/09/25 20:49:56 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 20:49:56 - mmengine - INFO - Iter(train) [1000/7452] base_lr: 1.9984e-06 lr: 1.9984e-06 eta: 2:06:35 time: 1.0157 data_time: 0.0081 memory: 16130 loss: 0.1475 tflops: 7.3267 tokens_per_sec: 121.0948 +2025/09/25 20:50:07 - mmengine - INFO - Iter(train) [1010/7452] base_lr: 1.9983e-06 lr: 1.9983e-06 eta: 2:06:23 time: 1.2727 data_time: 0.0081 memory: 16131 loss: 2.3750 tflops: 4.8951 tokens_per_sec: 80.9279 +2025/09/25 20:50:18 - mmengine - INFO - Iter(train) [1020/7452] base_lr: 1.9982e-06 lr: 1.9982e-06 eta: 2:06:03 time: 1.0320 data_time: 0.0087 memory: 16131 loss: 3.0000 tflops: 7.0350 tokens_per_sec: 116.2793 +2025/09/25 20:50:23 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 20:50:23 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:50:24 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern within the slide.<|im_end|> + +2025/09/25 20:50:24 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:51:01 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report for the whole slide image of the breast biopsy specimen is as follows: + +1. **Diagnosis:** The whole slide image of the breast biopsy specimen is consistent with **atypical ductal hyperplasia (ADH)**. + +2. **Description of the Specimen:** + - **Tissue:** The specimen consists of a core biopsy of the breast tissue. + - **Size:** The core biopsy measures approximately 1.5 cm in diameter. + - **Location:** The biopsy was taken from the right breast, specifically from the upper outer quadrant. + +3. **Histological Features:** + - **Ductal Hyperplasia:** There is evidence of **atypical ductal hyperplasia** within the breast tissue. This is characterized by an increase in the number of ductal structures, with some of these showing architectural and cellular atypia. + - **Cellular Atypia:** The cells within the ducts show a degree of atypia, which is a hallmark of ADH. The atypia is characterized by increased nuclear size, irregular nuclear contours, and increased nuclear-cytoplasmic ratio. + - **Mitotic Activity:** There is a low to moderate number of mitotic figures, which is consistent with ADH. + - **Inflammation:** There is no significant evidence of inflammation or necrosis within the specimen. + +4. **Implications:** + - **Risk of Breast Cancer:** Atypical ductal hyperplasia is considered a **precursor lesion** and is associated with an increased risk of developing breast cancer, particularly ductal carcinoma in situ (DCIS) or invasive ductal carcinoma. + - **Follow-Up:** The patient should be followed closely by a healthcare provider, and regular mammograms or other imaging studies may be recommended to monitor for any changes in the breast tissue. + +5. **Conclusion:** + - **Final Diagnosis:** The final diagnosis is **atypical ductal hyperplasia**. + +This report indicates that the biopsy specimen shows evidence of atypical ductal hyperplasia, which is a benign condition but carries an increased risk of developing breast cancer. The patient should be monitored closely for any changes in their breast tissue.<|im_end|> + +2025/09/25 20:51:01 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 20:51:24 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +Based on the provided features, the correct histological classification of the tumor appears to be: + +**B) Moderately differentiated squamous cell carcinoma** + +Here's the reasoning: + +1. **Cellularity and Pleomorphism**: The cells are moderately pleomorphic, indicating a degree of atypia that is characteristic of squamous cell carcinoma but not as pronounced as in poorly differentiated or poorly differentiated keratinizing squamous cell carcinoma. + +2. **Mitotic Activity**: The presence of mitotic figures (3-4 mitoses per 10 high-power fields) is a hallmark of moderately differentiated squamous cell carcinoma. Poorly differentiated squamous cell carcinoma typically shows a higher mitotic rate. + +3. **Cellular Arrangement**: The cells are arranged in sheets and cords, which is consistent with squamous cell carcinoma. Well-differentiated squamous cell carcinoma often shows more keratinization and a more organized architecture. + +4. **Nuclear Features**: The nuclei are moderately atypical, with some variation in size and shape, which is typical of moderately differentiated squamous cell carcinoma. + +5. **Mitotic Rate**: The mitotic rate of 3-4 per 10 high-power fields is within the range for moderately differentiated squamous cell carcinoma. + +Therefore, the most appropriate classification based on the given features is **B) Moderately differentiated squamous cell carcinoma**.<|im_end|> + +2025/09/25 20:51:42 - mmengine - INFO - Iter(train) [1030/7452] base_lr: 1.9980e-06 lr: 1.9980e-06 eta: 2:13:23 time: 1.3313 data_time: 0.0080 memory: 16131 loss: 1.7188 tflops: 6.4549 tokens_per_sec: 106.6594 +2025/09/25 20:51:53 - mmengine - INFO - Iter(train) [1040/7452] base_lr: 1.9979e-06 lr: 1.9979e-06 eta: 2:13:04 time: 1.2392 data_time: 0.0081 memory: 16131 loss: 0.2148 tflops: 4.5388 tokens_per_sec: 75.0466 +2025/09/25 20:52:04 - mmengine - INFO - Iter(train) [1050/7452] base_lr: 1.9977e-06 lr: 1.9977e-06 eta: 2:12:40 time: 1.0009 data_time: 0.0079 memory: 16131 loss: 1.9531 tflops: 7.5562 tokens_per_sec: 124.8853 +2025/09/25 20:52:15 - mmengine - INFO - Iter(train) [1060/7452] base_lr: 1.9976e-06 lr: 1.9976e-06 eta: 2:12:18 time: 1.0372 data_time: 0.0084 memory: 16131 loss: 0.1592 tflops: 7.9935 tokens_per_sec: 132.0925 +2025/09/25 20:52:26 - mmengine - INFO - Iter(train) [1070/7452] base_lr: 1.9974e-06 lr: 1.9974e-06 eta: 2:11:57 time: 1.3420 data_time: 0.0089 memory: 16131 loss: 0.1211 tflops: 5.0488 tokens_per_sec: 83.4581 +2025/09/25 20:52:36 - mmengine - INFO - Iter(train) [1080/7452] base_lr: 1.9973e-06 lr: 1.9973e-06 eta: 2:11:33 time: 1.0530 data_time: 0.0087 memory: 16131 loss: 1.7188 tflops: 5.9168 tokens_per_sec: 97.8182 +2025/09/25 20:52:48 - mmengine - INFO - Iter(train) [1090/7452] base_lr: 1.9971e-06 lr: 1.9971e-06 eta: 2:11:15 time: 1.0398 data_time: 0.0084 memory: 16130 loss: 0.2305 tflops: 6.7494 tokens_per_sec: 111.5644 +2025/09/25 20:52:59 - mmengine - INFO - Iter(train) [1100/7452] base_lr: 1.9969e-06 lr: 1.9969e-06 eta: 2:10:56 time: 1.0237 data_time: 0.0087 memory: 16130 loss: 0.0752 tflops: 7.2106 tokens_per_sec: 119.1776 +2025/09/25 20:53:10 - mmengine - INFO - Iter(train) [1110/7452] base_lr: 1.9967e-06 lr: 1.9967e-06 eta: 2:10:33 time: 1.0310 data_time: 0.0080 memory: 16131 loss: 0.3789 tflops: 6.0426 tokens_per_sec: 99.8991 +2025/09/25 20:53:20 - mmengine - INFO - Iter(train) [1120/7452] base_lr: 1.9966e-06 lr: 1.9966e-06 eta: 2:10:11 time: 1.0251 data_time: 0.0083 memory: 16131 loss: 2.2656 tflops: 4.7190 tokens_per_sec: 78.0398 +2025/09/25 20:53:31 - mmengine - INFO - Iter(train) [1130/7452] base_lr: 1.9964e-06 lr: 1.9964e-06 eta: 2:09:53 time: 1.0515 data_time: 0.0085 memory: 16130 loss: 2.0312 tflops: 6.5012 tokens_per_sec: 107.4661 +2025/09/25 20:53:42 - mmengine - INFO - Iter(train) [1140/7452] base_lr: 1.9962e-06 lr: 1.9962e-06 eta: 2:09:29 time: 1.0147 data_time: 0.0083 memory: 16132 loss: 0.1216 tflops: 6.8563 tokens_per_sec: 113.3332 +2025/09/25 20:53:53 - mmengine - INFO - Iter(train) [1150/7452] base_lr: 1.9960e-06 lr: 1.9960e-06 eta: 2:09:11 time: 1.4088 data_time: 0.0087 memory: 16130 loss: 0.0708 tflops: 5.1103 tokens_per_sec: 84.4670 +2025/09/25 20:54:04 - mmengine - INFO - Iter(train) [1160/7452] base_lr: 1.9958e-06 lr: 1.9958e-06 eta: 2:08:53 time: 1.0357 data_time: 0.0082 memory: 16131 loss: 1.5078 tflops: 3.9695 tokens_per_sec: 65.6556 +2025/09/25 20:54:15 - mmengine - INFO - Iter(train) [1170/7452] base_lr: 1.9956e-06 lr: 1.9956e-06 eta: 2:08:31 time: 1.0116 data_time: 0.0085 memory: 16130 loss: 1.4531 tflops: 5.2611 tokens_per_sec: 86.9951 +2025/09/25 20:54:26 - mmengine - INFO - Iter(train) [1180/7452] base_lr: 1.9954e-06 lr: 1.9954e-06 eta: 2:08:13 time: 1.1072 data_time: 0.0093 memory: 16131 loss: 0.0781 tflops: 5.4084 tokens_per_sec: 89.4185 +2025/09/25 20:54:38 - mmengine - INFO - Iter(train) [1190/7452] base_lr: 1.9952e-06 lr: 1.9952e-06 eta: 2:07:59 time: 1.1147 data_time: 0.0086 memory: 16132 loss: 2.5469 tflops: 4.1767 tokens_per_sec: 69.0746 +2025/09/25 20:54:49 - mmengine - INFO - Iter(train) [1200/7452] base_lr: 1.9949e-06 lr: 1.9949e-06 eta: 2:07:41 time: 1.1564 data_time: 0.0086 memory: 16130 loss: 2.0156 tflops: 4.2357 tokens_per_sec: 70.0472 +2025/09/25 20:55:00 - mmengine - INFO - Iter(train) [1210/7452] base_lr: 1.9947e-06 lr: 1.9947e-06 eta: 2:07:24 time: 1.1182 data_time: 0.0087 memory: 16131 loss: 0.1113 tflops: 6.9804 tokens_per_sec: 115.3622 +2025/09/25 20:55:12 - mmengine - INFO - Iter(train) [1220/7452] base_lr: 1.9945e-06 lr: 1.9945e-06 eta: 2:07:11 time: 1.1122 data_time: 0.0080 memory: 16130 loss: 0.1172 tflops: 5.9828 tokens_per_sec: 98.9012 +2025/09/25 20:55:24 - mmengine - INFO - Iter(train) [1230/7452] base_lr: 1.9942e-06 lr: 1.9942e-06 eta: 2:06:55 time: 1.0257 data_time: 0.0085 memory: 16131 loss: 0.0393 tflops: 9.4422 tokens_per_sec: 155.9841 +2025/09/25 20:55:35 - mmengine - INFO - Iter(train) [1240/7452] base_lr: 1.9940e-06 lr: 1.9940e-06 eta: 2:06:38 time: 1.1214 data_time: 0.0082 memory: 16130 loss: 2.0781 tflops: 3.6661 tokens_per_sec: 60.6367 +2025/09/25 20:55:47 - mmengine - INFO - Iter(train) [1250/7452] base_lr: 1.9938e-06 lr: 1.9938e-06 eta: 2:06:23 time: 1.0246 data_time: 0.0084 memory: 16132 loss: 2.2812 tflops: 6.3762 tokens_per_sec: 105.4064 +2025/09/25 20:55:58 - mmengine - INFO - Iter(train) [1260/7452] base_lr: 1.9935e-06 lr: 1.9935e-06 eta: 2:06:04 time: 1.1049 data_time: 0.0081 memory: 16130 loss: 2.0156 tflops: 4.7618 tokens_per_sec: 78.7409 +2025/09/25 20:56:09 - mmengine - INFO - Iter(train) [1270/7452] base_lr: 1.9932e-06 lr: 1.9932e-06 eta: 2:05:47 time: 1.0217 data_time: 0.0082 memory: 16131 loss: 0.2100 tflops: 6.4537 tokens_per_sec: 106.6868 +2025/09/25 20:56:21 - mmengine - INFO - Iter(train) [1280/7452] base_lr: 1.9930e-06 lr: 1.9930e-06 eta: 2:05:32 time: 1.3689 data_time: 0.0082 memory: 16130 loss: 0.1719 tflops: 5.1265 tokens_per_sec: 84.7386 +2025/09/25 20:56:31 - mmengine - INFO - Iter(train) [1290/7452] base_lr: 1.9927e-06 lr: 1.9927e-06 eta: 2:05:12 time: 1.2645 data_time: 0.0086 memory: 16131 loss: 1.2656 tflops: 2.7250 tokens_per_sec: 45.0784 +2025/09/25 20:56:43 - mmengine - INFO - Iter(train) [1300/7452] base_lr: 1.9925e-06 lr: 1.9925e-06 eta: 2:04:56 time: 1.0304 data_time: 0.0079 memory: 16131 loss: 2.4531 tflops: 7.6344 tokens_per_sec: 126.1695 +2025/09/25 20:56:54 - mmengine - INFO - Iter(train) [1310/7452] base_lr: 1.9922e-06 lr: 1.9922e-06 eta: 2:04:39 time: 1.0323 data_time: 0.0092 memory: 16130 loss: 2.5312 tflops: 4.7447 tokens_per_sec: 78.4633 +2025/09/25 20:57:05 - mmengine - INFO - Iter(train) [1320/7452] base_lr: 1.9919e-06 lr: 1.9919e-06 eta: 2:04:21 time: 1.2138 data_time: 0.0084 memory: 16131 loss: 1.6797 tflops: 3.6863 tokens_per_sec: 60.9672 +2025/09/25 20:57:15 - mmengine - INFO - Iter(train) [1330/7452] base_lr: 1.9916e-06 lr: 1.9916e-06 eta: 2:04:02 time: 1.3014 data_time: 0.0084 memory: 16131 loss: 0.1040 tflops: 5.0664 tokens_per_sec: 83.7533 +2025/09/25 20:57:26 - mmengine - INFO - Iter(train) [1340/7452] base_lr: 1.9913e-06 lr: 1.9913e-06 eta: 2:03:44 time: 1.0359 data_time: 0.0084 memory: 16131 loss: 2.0156 tflops: 5.5466 tokens_per_sec: 91.7079 +2025/09/25 20:57:37 - mmengine - INFO - Iter(train) [1350/7452] base_lr: 1.9910e-06 lr: 1.9910e-06 eta: 2:03:27 time: 1.0284 data_time: 0.0086 memory: 16130 loss: 0.0884 tflops: 8.0617 tokens_per_sec: 133.2190 +2025/09/25 20:57:49 - mmengine - INFO - Iter(train) [1360/7452] base_lr: 1.9907e-06 lr: 1.9907e-06 eta: 2:03:12 time: 1.0295 data_time: 0.0080 memory: 16130 loss: 2.2031 tflops: 4.9339 tokens_per_sec: 81.5896 +2025/09/25 20:57:59 - mmengine - INFO - Iter(train) [1370/7452] base_lr: 1.9904e-06 lr: 1.9904e-06 eta: 2:02:53 time: 1.0724 data_time: 0.0094 memory: 16130 loss: 0.1660 tflops: 6.1487 tokens_per_sec: 101.6442 +2025/09/25 20:58:10 - mmengine - INFO - Iter(train) [1380/7452] base_lr: 1.9901e-06 lr: 1.9901e-06 eta: 2:02:34 time: 1.0170 data_time: 0.0079 memory: 16130 loss: 1.4766 tflops: 3.9832 tokens_per_sec: 65.8831 +2025/09/25 20:58:22 - mmengine - INFO - Iter(train) [1390/7452] base_lr: 1.9898e-06 lr: 1.9898e-06 eta: 2:02:20 time: 1.3785 data_time: 0.0078 memory: 16131 loss: 0.0596 tflops: 5.0909 tokens_per_sec: 84.1504 +2025/09/25 20:58:32 - mmengine - INFO - Iter(train) [1400/7452] base_lr: 1.9895e-06 lr: 1.9895e-06 eta: 2:02:02 time: 1.0527 data_time: 0.0084 memory: 16133 loss: 0.0535 tflops: 5.8032 tokens_per_sec: 95.9425 +2025/09/25 20:58:43 - mmengine - INFO - Iter(train) [1410/7452] base_lr: 1.9892e-06 lr: 1.9892e-06 eta: 2:01:45 time: 1.0429 data_time: 0.0085 memory: 16130 loss: 1.6719 tflops: 4.7544 tokens_per_sec: 78.6233 +2025/09/25 20:58:55 - mmengine - INFO - Iter(train) [1420/7452] base_lr: 1.9888e-06 lr: 1.9888e-06 eta: 2:01:30 time: 1.3761 data_time: 0.0084 memory: 16131 loss: 0.1108 tflops: 4.8355 tokens_per_sec: 79.9340 +2025/09/25 20:59:05 - mmengine - INFO - Iter(train) [1430/7452] base_lr: 1.9885e-06 lr: 1.9885e-06 eta: 2:01:12 time: 1.0603 data_time: 0.0085 memory: 16130 loss: 1.9766 tflops: 4.6194 tokens_per_sec: 76.3920 +2025/09/25 20:59:17 - mmengine - INFO - Iter(train) [1440/7452] base_lr: 1.9882e-06 lr: 1.9882e-06 eta: 2:00:56 time: 1.0642 data_time: 0.0081 memory: 16131 loss: 0.0664 tflops: 6.7654 tokens_per_sec: 111.8245 +2025/09/25 20:59:28 - mmengine - INFO - Iter(train) [1450/7452] base_lr: 1.9878e-06 lr: 1.9878e-06 eta: 2:00:40 time: 1.0263 data_time: 0.0082 memory: 16131 loss: 2.1250 tflops: 4.3597 tokens_per_sec: 72.1038 +2025/09/25 20:59:38 - mmengine - INFO - Iter(train) [1460/7452] base_lr: 1.9875e-06 lr: 1.9875e-06 eta: 2:00:22 time: 1.0145 data_time: 0.0080 memory: 16131 loss: 0.2559 tflops: 6.0216 tokens_per_sec: 99.5546 +2025/09/25 20:59:49 - mmengine - INFO - Iter(train) [1470/7452] base_lr: 1.9871e-06 lr: 1.9871e-06 eta: 2:00:05 time: 1.0211 data_time: 0.0085 memory: 16131 loss: 1.9922 tflops: 3.7892 tokens_per_sec: 62.6771 +2025/09/25 21:00:00 - mmengine - INFO - Iter(train) [1480/7452] base_lr: 1.9868e-06 lr: 1.9868e-06 eta: 1:59:49 time: 1.0429 data_time: 0.0089 memory: 16131 loss: 1.0156 tflops: 3.8842 tokens_per_sec: 64.2455 +2025/09/25 21:00:11 - mmengine - INFO - Iter(train) [1490/7452] base_lr: 1.9864e-06 lr: 1.9864e-06 eta: 1:59:33 time: 1.0185 data_time: 0.0081 memory: 16131 loss: 1.5703 tflops: 4.6902 tokens_per_sec: 77.5645 +2025/09/25 21:00:22 - mmengine - INFO - Iter(train) [1500/7452] base_lr: 1.9860e-06 lr: 1.9860e-06 eta: 1:59:17 time: 1.0337 data_time: 0.0082 memory: 16130 loss: 2.3594 tflops: 6.3199 tokens_per_sec: 104.4761 +2025/09/25 21:00:33 - mmengine - INFO - Iter(train) [1510/7452] base_lr: 1.9857e-06 lr: 1.9857e-06 eta: 1:59:01 time: 1.0235 data_time: 0.0084 memory: 16131 loss: 0.1670 tflops: 7.2708 tokens_per_sec: 120.1722 +2025/09/25 21:00:44 - mmengine - INFO - Iter(train) [1520/7452] base_lr: 1.9853e-06 lr: 1.9853e-06 eta: 1:58:44 time: 1.0554 data_time: 0.0081 memory: 16131 loss: 1.7891 tflops: 6.5346 tokens_per_sec: 108.0171 +2025/09/25 21:00:56 - mmengine - INFO - Iter(train) [1530/7452] base_lr: 1.9849e-06 lr: 1.9849e-06 eta: 1:58:30 time: 1.4113 data_time: 0.0084 memory: 16130 loss: 1.7266 tflops: 2.7844 tokens_per_sec: 46.0553 +2025/09/25 21:01:03 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 21:01:03 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:01:04 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern within the slide.<|im_end|> + +2025/09/25 21:01:04 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:01:54 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report for the whole slide image of the breast biopsy specimen is as follows: + +1. **Diagnosis:** The whole slide image of the breast biopsy specimen demonstrates a well-circumscribed, well-differentiated, infiltrating ductal carcinoma of the breast, with a tumor size of approximately 1.5 cm in diameter. + +2. **Histological Features:** + - The tumor cells are arranged in a cribriform pattern, with areas of solid and cribriform growth. + - The tumor cells show moderate nuclear atypia and are positive for estrogen receptor (ER) and progesterone receptor (PR), indicating hormone receptor positivity. + - The tumor cells are negative for HER2 by immunohistochemistry (IHC) and FISH, indicating no amplification or overexpression of HER2. + - The tumor cells are negative for p53 by immunohistochemistry, indicating no p53 mutation. + - The tumor cells are negative for p63 by immunohistochemistry, indicating no basal cell component. + - The tumor cells are negative for p16 by immunohistochemistry, indicating no p16 mutation. + - The tumor cells are negative for p53 by immunohistochemistry, indicating no p53 mutation. + - The tumor cells are negative for p63 by immunohistochemistry, indicating no basal cell component. + - The tumor cells are negative for p16 by immunohistochemistry, indicating no p16 mutation. + - The tumor cells are negative for p53 by immunohistochemistry, indicating no p53 mutation. + +3. **Infiltration:** The tumor cells are seen infiltrating the surrounding stroma and are surrounded by a fibrous stroma. + +4. **Mitotic Activity:** The mitotic activity is low, with a count of less than 3 mitoses per 10 high power fields (HPF). + +5. **Lymphovascular Invasion:** No evidence of lymphovascular invasion is seen. + +6. **Infiltration of the Ductal Epithelium:** The tumor cells are seen infiltrating the ductal epithelium. + +7. **Infiltration of the Stroma:** The tumor cells are seen infiltrating the surrounding stroma. + +8. **Infiltration of the Nipple:** No evidence of tumor cells in the nipple. + +9. **Infiltration of the Skin:** No evidence of tumor cells in the overlying skin. + +10. **Infiltration of the Nipple:** No evidence of tumor cells in the nipple. + +11. **Infiltration of the Nipple:** No evidence of tumor cells in the nipple. + +12. **Infiltration of the Nipple:** No evidence of tumor cells in the nipple. + +13. **Infiltration of the Nipple:** No evidence of tumor cells in the nipple. + +14. **Infiltration + +2025/09/25 21:01:54 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:02:04 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +Based on the provided features, the correct histological classification of the tumor appears to be B) Moderately differentiated squamous cell carcinoma. The presence of keratin pearls and intercellular bridges, along with the presence of keratinizing squamous cells, are characteristic of squamous cell carcinoma. The presence of keratin pearls and intercellular bridges indicates a degree of differentiation, which is consistent with a moderately differentiated carcinoma rather than a poorly or well-differentiated one. Additionally, the presence of keratinization and the overall architecture of the tumor are more consistent with squamous cell carcinoma rather than adenocarcinoma.<|im_end|> + +2025/09/25 21:02:22 - mmengine - INFO - Iter(train) [1540/7452] base_lr: 1.9845e-06 lr: 1.9845e-06 eta: 2:03:02 time: 1.0409 data_time: 0.0082 memory: 16131 loss: 0.0825 tflops: 6.2184 tokens_per_sec: 102.7985 +2025/09/25 21:02:32 - mmengine - INFO - Iter(train) [1550/7452] base_lr: 1.9841e-06 lr: 1.9841e-06 eta: 2:02:43 time: 1.0414 data_time: 0.0080 memory: 16130 loss: 1.9062 tflops: 4.4710 tokens_per_sec: 73.9408 +2025/09/25 21:02:43 - mmengine - INFO - Iter(train) [1560/7452] base_lr: 1.9837e-06 lr: 1.9837e-06 eta: 2:02:26 time: 1.3993 data_time: 0.0080 memory: 16130 loss: 0.0120 tflops: 5.8816 tokens_per_sec: 97.1944 +2025/09/25 21:02:55 - mmengine - INFO - Iter(train) [1570/7452] base_lr: 1.9833e-06 lr: 1.9833e-06 eta: 2:02:08 time: 1.2095 data_time: 0.0084 memory: 16130 loss: 2.1875 tflops: 5.3515 tokens_per_sec: 88.4674 +2025/09/25 21:03:05 - mmengine - INFO - Iter(train) [1580/7452] base_lr: 1.9829e-06 lr: 1.9829e-06 eta: 2:01:50 time: 1.0442 data_time: 0.0085 memory: 16131 loss: 2.7344 tflops: 7.4169 tokens_per_sec: 122.5781 +2025/09/25 21:03:16 - mmengine - INFO - Iter(train) [1590/7452] base_lr: 1.9825e-06 lr: 1.9825e-06 eta: 2:01:31 time: 1.0526 data_time: 0.0087 memory: 16131 loss: 0.0488 tflops: 6.1489 tokens_per_sec: 101.6508 +2025/09/25 21:03:28 - mmengine - INFO - Iter(train) [1600/7452] base_lr: 1.9821e-06 lr: 1.9821e-06 eta: 2:01:16 time: 1.3018 data_time: 0.0079 memory: 16131 loss: 0.0864 tflops: 5.8565 tokens_per_sec: 96.7919 +2025/09/25 21:03:39 - mmengine - INFO - Iter(train) [1610/7452] base_lr: 1.9817e-06 lr: 1.9817e-06 eta: 2:00:57 time: 1.0435 data_time: 0.0082 memory: 16131 loss: 2.0625 tflops: 7.8284 tokens_per_sec: 129.3664 +2025/09/25 21:03:49 - mmengine - INFO - Iter(train) [1620/7452] base_lr: 1.9813e-06 lr: 1.9813e-06 eta: 2:00:38 time: 1.0106 data_time: 0.0077 memory: 16130 loss: 0.1572 tflops: 6.0451 tokens_per_sec: 99.9430 +2025/09/25 21:04:00 - mmengine - INFO - Iter(train) [1630/7452] base_lr: 1.9808e-06 lr: 1.9808e-06 eta: 2:00:20 time: 1.0496 data_time: 0.0081 memory: 16131 loss: 0.0757 tflops: 6.6281 tokens_per_sec: 109.5608 +2025/09/25 21:04:11 - mmengine - INFO - Iter(train) [1640/7452] base_lr: 1.9804e-06 lr: 1.9804e-06 eta: 2:00:02 time: 1.0035 data_time: 0.0087 memory: 16130 loss: 0.0684 tflops: 6.2083 tokens_per_sec: 102.6383 +2025/09/25 21:04:22 - mmengine - INFO - Iter(train) [1650/7452] base_lr: 1.9800e-06 lr: 1.9800e-06 eta: 1:59:44 time: 1.2909 data_time: 0.0082 memory: 16130 loss: 0.0498 tflops: 5.5303 tokens_per_sec: 91.4111 +2025/09/25 21:04:33 - mmengine - INFO - Iter(train) [1660/7452] base_lr: 1.9795e-06 lr: 1.9795e-06 eta: 1:59:28 time: 1.3493 data_time: 0.0084 memory: 16131 loss: 1.9141 tflops: 4.7970 tokens_per_sec: 79.3022 +2025/09/25 21:04:44 - mmengine - INFO - Iter(train) [1670/7452] base_lr: 1.9791e-06 lr: 1.9791e-06 eta: 1:59:11 time: 1.0333 data_time: 0.0085 memory: 16131 loss: 1.8047 tflops: 4.0374 tokens_per_sec: 66.7779 +2025/09/25 21:04:55 - mmengine - INFO - Iter(train) [1680/7452] base_lr: 1.9786e-06 lr: 1.9786e-06 eta: 1:58:53 time: 1.0140 data_time: 0.0080 memory: 16130 loss: 2.8281 tflops: 5.9650 tokens_per_sec: 98.6192 +2025/09/25 21:05:06 - mmengine - INFO - Iter(train) [1690/7452] base_lr: 1.9781e-06 lr: 1.9781e-06 eta: 1:58:36 time: 1.2198 data_time: 0.0084 memory: 16131 loss: 0.0410 tflops: 5.6043 tokens_per_sec: 92.6407 +2025/09/25 21:05:17 - mmengine - INFO - Iter(train) [1700/7452] base_lr: 1.9777e-06 lr: 1.9777e-06 eta: 1:58:18 time: 1.0116 data_time: 0.0083 memory: 16131 loss: 0.0613 tflops: 7.4168 tokens_per_sec: 122.5837 +2025/09/25 21:05:28 - mmengine - INFO - Iter(train) [1710/7452] base_lr: 1.9772e-06 lr: 1.9772e-06 eta: 1:58:02 time: 1.0349 data_time: 0.0084 memory: 16130 loss: 0.1455 tflops: 6.5467 tokens_per_sec: 108.2200 +2025/09/25 21:05:39 - mmengine - INFO - Iter(train) [1720/7452] base_lr: 1.9767e-06 lr: 1.9767e-06 eta: 1:57:45 time: 1.0060 data_time: 0.0089 memory: 16131 loss: 0.0076 tflops: 8.1203 tokens_per_sec: 134.1913 +2025/09/25 21:05:49 - mmengine - INFO - Iter(train) [1730/7452] base_lr: 1.9763e-06 lr: 1.9763e-06 eta: 1:57:27 time: 0.9961 data_time: 0.0084 memory: 16131 loss: 0.2197 tflops: 7.9577 tokens_per_sec: 131.5113 +2025/09/25 21:06:01 - mmengine - INFO - Iter(train) [1740/7452] base_lr: 1.9758e-06 lr: 1.9758e-06 eta: 1:57:11 time: 1.3361 data_time: 0.0094 memory: 16131 loss: 1.8672 tflops: 3.6659 tokens_per_sec: 60.6236 +2025/09/25 21:06:11 - mmengine - INFO - Iter(train) [1750/7452] base_lr: 1.9753e-06 lr: 1.9753e-06 eta: 1:56:54 time: 1.0479 data_time: 0.0086 memory: 16131 loss: 1.9297 tflops: 4.2697 tokens_per_sec: 70.6146 +2025/09/25 21:06:22 - mmengine - INFO - Iter(train) [1760/7452] base_lr: 1.9748e-06 lr: 1.9748e-06 eta: 1:56:38 time: 1.1058 data_time: 0.0085 memory: 16131 loss: 0.0386 tflops: 4.9771 tokens_per_sec: 82.2966 +2025/09/25 21:06:33 - mmengine - INFO - Iter(train) [1770/7452] base_lr: 1.9743e-06 lr: 1.9743e-06 eta: 1:56:21 time: 1.3154 data_time: 0.0085 memory: 16131 loss: 0.0109 tflops: 5.5194 tokens_per_sec: 91.2278 +2025/09/25 21:06:44 - mmengine - INFO - Iter(train) [1780/7452] base_lr: 1.9738e-06 lr: 1.9738e-06 eta: 1:56:04 time: 1.0543 data_time: 0.0081 memory: 16131 loss: 2.3125 tflops: 3.8994 tokens_per_sec: 64.4958 +2025/09/25 21:06:55 - mmengine - INFO - Iter(train) [1790/7452] base_lr: 1.9733e-06 lr: 1.9733e-06 eta: 1:55:47 time: 1.0317 data_time: 0.0086 memory: 16131 loss: 0.0212 tflops: 8.5650 tokens_per_sec: 141.5187 +2025/09/25 21:07:07 - mmengine - INFO - Iter(train) [1800/7452] base_lr: 1.9728e-06 lr: 1.9728e-06 eta: 1:55:33 time: 1.0435 data_time: 0.0081 memory: 16130 loss: 0.0046 tflops: 6.8992 tokens_per_sec: 114.0358 +2025/09/25 21:07:18 - mmengine - INFO - Iter(train) [1810/7452] base_lr: 1.9723e-06 lr: 1.9723e-06 eta: 1:55:16 time: 1.4118 data_time: 0.0085 memory: 16131 loss: 0.0601 tflops: 4.4988 tokens_per_sec: 74.3734 +2025/09/25 21:07:28 - mmengine - INFO - Iter(train) [1820/7452] base_lr: 1.9717e-06 lr: 1.9717e-06 eta: 1:55:00 time: 1.2722 data_time: 0.0086 memory: 16131 loss: 0.0386 tflops: 5.3732 tokens_per_sec: 88.8193 +2025/09/25 21:07:39 - mmengine - INFO - Iter(train) [1830/7452] base_lr: 1.9712e-06 lr: 1.9712e-06 eta: 1:54:44 time: 1.0350 data_time: 0.0086 memory: 16130 loss: 0.0044 tflops: 7.2488 tokens_per_sec: 119.8069 +2025/09/25 21:07:50 - mmengine - INFO - Iter(train) [1840/7452] base_lr: 1.9707e-06 lr: 1.9707e-06 eta: 1:54:28 time: 1.2418 data_time: 0.0082 memory: 16130 loss: 0.0422 tflops: 6.0418 tokens_per_sec: 99.8572 +2025/09/25 21:08:01 - mmengine - INFO - Iter(train) [1850/7452] base_lr: 1.9701e-06 lr: 1.9701e-06 eta: 1:54:12 time: 1.1175 data_time: 0.0086 memory: 16131 loss: 2.0781 tflops: 4.1665 tokens_per_sec: 68.9065 +2025/09/25 21:08:13 - mmengine - INFO - Iter(train) [1860/7452] base_lr: 1.9696e-06 lr: 1.9696e-06 eta: 1:53:56 time: 1.0498 data_time: 0.0082 memory: 16131 loss: 0.0515 tflops: 6.3388 tokens_per_sec: 104.7853 +2025/09/25 21:08:23 - mmengine - INFO - Iter(train) [1870/7452] base_lr: 1.9691e-06 lr: 1.9691e-06 eta: 1:53:40 time: 1.0117 data_time: 0.0091 memory: 16131 loss: 0.0593 tflops: 6.1584 tokens_per_sec: 101.8135 +2025/09/25 21:08:34 - mmengine - INFO - Iter(train) [1880/7452] base_lr: 1.9685e-06 lr: 1.9685e-06 eta: 1:53:24 time: 1.0285 data_time: 0.0085 memory: 16131 loss: 0.4082 tflops: 5.8221 tokens_per_sec: 96.2584 +2025/09/25 21:08:45 - mmengine - INFO - Iter(train) [1890/7452] base_lr: 1.9680e-06 lr: 1.9680e-06 eta: 1:53:08 time: 1.0509 data_time: 0.0086 memory: 16131 loss: 1.8203 tflops: 4.4882 tokens_per_sec: 74.2245 +2025/09/25 21:08:57 - mmengine - INFO - Iter(train) [1900/7452] base_lr: 1.9674e-06 lr: 1.9674e-06 eta: 1:52:53 time: 1.3474 data_time: 0.0083 memory: 16131 loss: 2.1406 tflops: 4.1743 tokens_per_sec: 69.0198 +2025/09/25 21:09:07 - mmengine - INFO - Iter(train) [1910/7452] base_lr: 1.9668e-06 lr: 1.9668e-06 eta: 1:52:37 time: 1.3214 data_time: 0.0085 memory: 16131 loss: 0.1250 tflops: 4.5315 tokens_per_sec: 74.9208 +2025/09/25 21:09:19 - mmengine - INFO - Iter(train) [1920/7452] base_lr: 1.9663e-06 lr: 1.9663e-06 eta: 1:52:22 time: 1.3418 data_time: 0.0087 memory: 16130 loss: 0.0058 tflops: 4.8239 tokens_per_sec: 79.7461 +2025/09/25 21:09:30 - mmengine - INFO - Iter(train) [1930/7452] base_lr: 1.9657e-06 lr: 1.9657e-06 eta: 1:52:07 time: 1.0200 data_time: 0.0082 memory: 16131 loss: 0.0640 tflops: 5.5735 tokens_per_sec: 92.1548 +2025/09/25 21:09:41 - mmengine - INFO - Iter(train) [1940/7452] base_lr: 1.9651e-06 lr: 1.9651e-06 eta: 1:51:51 time: 1.0295 data_time: 0.0087 memory: 16131 loss: 2.5469 tflops: 4.3462 tokens_per_sec: 71.8800 +2025/09/25 21:09:52 - mmengine - INFO - Iter(train) [1950/7452] base_lr: 1.9645e-06 lr: 1.9645e-06 eta: 1:51:36 time: 1.3013 data_time: 0.0089 memory: 16131 loss: 0.0294 tflops: 6.5108 tokens_per_sec: 107.5861 +2025/09/25 21:10:03 - mmengine - INFO - Iter(train) [1960/7452] base_lr: 1.9639e-06 lr: 1.9639e-06 eta: 1:51:20 time: 1.0202 data_time: 0.0082 memory: 16131 loss: 0.0038 tflops: 7.6512 tokens_per_sec: 126.4483 +2025/09/25 21:10:14 - mmengine - INFO - Iter(train) [1970/7452] base_lr: 1.9633e-06 lr: 1.9633e-06 eta: 1:51:04 time: 1.0294 data_time: 0.0083 memory: 16131 loss: 0.0425 tflops: 7.6413 tokens_per_sec: 126.2835 +2025/09/25 21:10:24 - mmengine - INFO - Iter(train) [1980/7452] base_lr: 1.9628e-06 lr: 1.9628e-06 eta: 1:50:49 time: 1.0154 data_time: 0.0084 memory: 16131 loss: 1.7734 tflops: 5.2412 tokens_per_sec: 86.6660 +2025/09/25 21:10:35 - mmengine - INFO - Iter(train) [1990/7452] base_lr: 1.9621e-06 lr: 1.9621e-06 eta: 1:50:33 time: 1.0273 data_time: 0.0081 memory: 16131 loss: 0.0201 tflops: 7.2444 tokens_per_sec: 119.7348 +2025/09/25 21:10:46 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 21:10:46 - mmengine - INFO - Iter(train) [2000/7452] base_lr: 1.9615e-06 lr: 1.9615e-06 eta: 1:50:18 time: 0.9910 data_time: 0.0087 memory: 16131 loss: 0.0168 tflops: 6.7144 tokens_per_sec: 110.9947 +2025/09/25 21:10:57 - mmengine - INFO - Iter(train) [2010/7452] base_lr: 1.9609e-06 lr: 1.9609e-06 eta: 1:50:02 time: 1.0306 data_time: 0.0084 memory: 16131 loss: 0.0491 tflops: 6.8684 tokens_per_sec: 113.5300 +2025/09/25 21:11:08 - mmengine - INFO - Iter(train) [2020/7452] base_lr: 1.9603e-06 lr: 1.9603e-06 eta: 1:49:46 time: 1.0740 data_time: 0.0086 memory: 16131 loss: 2.3438 tflops: 6.8166 tokens_per_sec: 112.6672 +2025/09/25 21:11:19 - mmengine - INFO - Iter(train) [2030/7452] base_lr: 1.9597e-06 lr: 1.9597e-06 eta: 1:49:32 time: 1.4093 data_time: 0.0089 memory: 16131 loss: 1.7188 tflops: 3.9480 tokens_per_sec: 65.2793 +2025/09/25 21:11:30 - mmengine - INFO - Iter(train) [2040/7452] base_lr: 1.9591e-06 lr: 1.9591e-06 eta: 1:49:17 time: 1.2418 data_time: 0.0085 memory: 16131 loss: 0.0157 tflops: 5.2610 tokens_per_sec: 86.9710 +2025/09/25 21:11:39 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 21:11:39 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:11:41 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern within the slide.<|im_end|> + +2025/09/25 21:11:41 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:12:31 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report for the whole slide image of the breast tissue biopsy is as follows: + +1. **Diagnosis:** The whole slide image of the breast tissue biopsy demonstrates invasive ductal carcinoma of the breast, specifically classified as **ductal carcinoma in situ (DCIS)**. The tumor cells are predominantly composed of small, uniform, and pleomorphic cells with minimal atypia, consistent with a grade 1 DCIS. + +2. **Extent of Involvement:** The tumor involves approximately 10% of the total area of the core biopsy specimen. The tumor is predominantly located in the upper outer quadrant of the breast. + +3. **Histological Features:** + - **Cellularity:** The tumor cells are moderately cellular. + - **Mitotic Activity:** There are no mitotic figures observed in the field. + - **Pleomorphism:** The cells show minimal pleomorphism. + - **Nuclear Atypia:** The nuclear atypia is minimal. + - **Proliferation Index:** The Ki-67 proliferation index is approximately 1-2%. + - **Invasion:** The tumor cells show no evidence of invasion into the surrounding stroma or blood vessels. + +4. **Microscopic Findings:** + - **Infiltration:** The tumor cells infiltrate the ductal structures in a patchy and scattered pattern. + - **Infiltration Type:** The infiltration is predominantly intraductal with no evidence of extraductal invasion. + - **Infiltration Pattern:** The tumor cells infiltrate the ductal structures in a patchy and scattered pattern. + +5. **Additional Findings:** + - **Inflammatory Changes:** No evidence of inflammatory changes or lymphocytic infiltration. + - **Fibrosis:** No significant fibrosis is observed. + - **Calcifications:** No calcifications are present in the field. + - **Other Tumors:** No other tumors or atypical hyperplasia are observed. + +6. **Immunohistochemical Findings:** + - **ER (Estrogen Receptor):** Positive for estrogen receptor (ER) staining. + - **PR (Progesterone Receptor):** Positive for progesterone receptor (PR) staining. + - **HER2 (Human Epidermal Growth Factor Receptor 2):** Negative for HER2 staining. + - **Ki-67:** As mentioned earlier, the Ki-67 proliferation index is approximately 1-2%. + +7. **Clinical Implications:** + - **Risk of Progression:** The low-grade DCIS with minimal atypia and no evidence of invasion suggests a low risk of progression to invasive cancer. + - **Management:** The patient should be discussed with a multidisciplinary team to determine the appropriate management, which may include close monitoring, surgical excision, or other interventions depending on the patient's clinical and personal preferences. + +8. **Conclusion: + +2025/09/25 21:12:31 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:12:32 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 21:12:34 - mmengine - INFO - Iter(train) [2050/7452] base_lr: 1.9584e-06 lr: 1.9584e-06 eta: 1:51:22 time: 1.0498 data_time: 0.0086 memory: 16130 loss: 0.1465 tflops: 6.1080 tokens_per_sec: 100.9752 +2025/09/25 21:12:45 - mmengine - INFO - Iter(train) [2060/7452] base_lr: 1.9578e-06 lr: 1.9578e-06 eta: 1:51:06 time: 1.0396 data_time: 0.0086 memory: 16131 loss: 0.1138 tflops: 7.0416 tokens_per_sec: 116.3870 +2025/09/25 21:12:56 - mmengine - INFO - Iter(train) [2070/7452] base_lr: 1.9572e-06 lr: 1.9572e-06 eta: 1:50:50 time: 1.0348 data_time: 0.0085 memory: 16131 loss: 0.0040 tflops: 8.6559 tokens_per_sec: 143.0170 +2025/09/25 21:13:07 - mmengine - INFO - Iter(train) [2080/7452] base_lr: 1.9565e-06 lr: 1.9565e-06 eta: 1:50:33 time: 1.0240 data_time: 0.0080 memory: 16131 loss: 2.7969 tflops: 5.8476 tokens_per_sec: 96.6790 +2025/09/25 21:13:18 - mmengine - INFO - Iter(train) [2090/7452] base_lr: 1.9559e-06 lr: 1.9559e-06 eta: 1:50:17 time: 1.0192 data_time: 0.0087 memory: 16130 loss: 0.0009 tflops: 6.0532 tokens_per_sec: 100.0742 +2025/09/25 21:13:29 - mmengine - INFO - Iter(train) [2100/7452] base_lr: 1.9552e-06 lr: 1.9552e-06 eta: 1:50:02 time: 1.0047 data_time: 0.0083 memory: 16131 loss: 2.4688 tflops: 6.1407 tokens_per_sec: 101.5210 +2025/09/25 21:13:40 - mmengine - INFO - Iter(train) [2110/7452] base_lr: 1.9546e-06 lr: 1.9546e-06 eta: 1:49:45 time: 1.0049 data_time: 0.0084 memory: 16131 loss: 2.3750 tflops: 3.6095 tokens_per_sec: 59.7078 +2025/09/25 21:13:50 - mmengine - INFO - Iter(train) [2120/7452] base_lr: 1.9539e-06 lr: 1.9539e-06 eta: 1:49:29 time: 1.0022 data_time: 0.0084 memory: 16131 loss: 2.0156 tflops: 7.9097 tokens_per_sec: 130.7173 +2025/09/25 21:14:02 - mmengine - INFO - Iter(train) [2130/7452] base_lr: 1.9532e-06 lr: 1.9532e-06 eta: 1:49:14 time: 1.0553 data_time: 0.0086 memory: 16130 loss: 2.8750 tflops: 5.8466 tokens_per_sec: 96.6594 +2025/09/25 21:14:12 - mmengine - INFO - Iter(train) [2140/7452] base_lr: 1.9525e-06 lr: 1.9525e-06 eta: 1:48:57 time: 0.9988 data_time: 0.0083 memory: 16131 loss: 2.0625 tflops: 4.7220 tokens_per_sec: 78.0920 +2025/09/25 21:14:23 - mmengine - INFO - Iter(train) [2150/7452] base_lr: 1.9519e-06 lr: 1.9519e-06 eta: 1:48:41 time: 1.2018 data_time: 0.0086 memory: 16131 loss: 0.0068 tflops: 5.5368 tokens_per_sec: 91.5273 +2025/09/25 21:14:34 - mmengine - INFO - Iter(train) [2160/7452] base_lr: 1.9512e-06 lr: 1.9512e-06 eta: 1:48:25 time: 0.9995 data_time: 0.0081 memory: 16131 loss: 0.2090 tflops: 7.6275 tokens_per_sec: 126.0619 +2025/09/25 21:14:44 - mmengine - INFO - Iter(train) [2170/7452] base_lr: 1.9505e-06 lr: 1.9505e-06 eta: 1:48:09 time: 1.2333 data_time: 0.0088 memory: 16130 loss: 1.5078 tflops: 4.3151 tokens_per_sec: 71.3526 +2025/09/25 21:14:56 - mmengine - INFO - Iter(train) [2180/7452] base_lr: 1.9498e-06 lr: 1.9498e-06 eta: 1:47:54 time: 1.0094 data_time: 0.0084 memory: 16130 loss: 0.0129 tflops: 7.4325 tokens_per_sec: 122.8424 +2025/09/25 21:15:07 - mmengine - INFO - Iter(train) [2190/7452] base_lr: 1.9491e-06 lr: 1.9491e-06 eta: 1:47:38 time: 1.2353 data_time: 0.0080 memory: 16131 loss: 2.4531 tflops: 5.8771 tokens_per_sec: 97.1404 +2025/09/25 21:15:17 - mmengine - INFO - Iter(train) [2200/7452] base_lr: 1.9484e-06 lr: 1.9484e-06 eta: 1:47:22 time: 1.0009 data_time: 0.0084 memory: 16130 loss: 0.0444 tflops: 6.0432 tokens_per_sec: 99.9115 +2025/09/25 21:15:28 - mmengine - INFO - Iter(train) [2210/7452] base_lr: 1.9477e-06 lr: 1.9477e-06 eta: 1:47:06 time: 1.0406 data_time: 0.0082 memory: 16131 loss: 2.0938 tflops: 4.4742 tokens_per_sec: 73.9950 +2025/09/25 21:15:39 - mmengine - INFO - Iter(train) [2220/7452] base_lr: 1.9470e-06 lr: 1.9470e-06 eta: 1:46:52 time: 1.3004 data_time: 0.0086 memory: 16130 loss: 2.0938 tflops: 5.2567 tokens_per_sec: 86.8941 +2025/09/25 21:15:50 - mmengine - INFO - Iter(train) [2230/7452] base_lr: 1.9463e-06 lr: 1.9463e-06 eta: 1:46:36 time: 1.0415 data_time: 0.0081 memory: 16131 loss: 0.0082 tflops: 8.1351 tokens_per_sec: 134.4269 +2025/09/25 21:16:02 - mmengine - INFO - Iter(train) [2240/7452] base_lr: 1.9456e-06 lr: 1.9456e-06 eta: 1:46:23 time: 1.3635 data_time: 0.0086 memory: 16131 loss: 0.0090 tflops: 4.6137 tokens_per_sec: 76.2740 +2025/09/25 21:16:13 - mmengine - INFO - Iter(train) [2250/7452] base_lr: 1.9448e-06 lr: 1.9448e-06 eta: 1:46:07 time: 1.0568 data_time: 0.0082 memory: 16130 loss: 0.0287 tflops: 5.6660 tokens_per_sec: 93.6779 +2025/09/25 21:16:24 - mmengine - INFO - Iter(train) [2260/7452] base_lr: 1.9441e-06 lr: 1.9441e-06 eta: 1:45:52 time: 1.0209 data_time: 0.0084 memory: 16131 loss: 1.6797 tflops: 4.7976 tokens_per_sec: 79.3394 +2025/09/25 21:16:35 - mmengine - INFO - Iter(train) [2270/7452] base_lr: 1.9434e-06 lr: 1.9434e-06 eta: 1:45:37 time: 1.0055 data_time: 0.0084 memory: 16131 loss: 0.2236 tflops: 7.2204 tokens_per_sec: 119.3437 +2025/09/25 21:16:45 - mmengine - INFO - Iter(train) [2280/7452] base_lr: 1.9426e-06 lr: 1.9426e-06 eta: 1:45:21 time: 1.0175 data_time: 0.0088 memory: 16131 loss: 2.1094 tflops: 6.2420 tokens_per_sec: 103.1923 +2025/09/25 21:16:56 - mmengine - INFO - Iter(train) [2290/7452] base_lr: 1.9419e-06 lr: 1.9419e-06 eta: 1:45:05 time: 1.0331 data_time: 0.0083 memory: 16131 loss: 0.0005 tflops: 8.3773 tokens_per_sec: 138.4234 +2025/09/25 21:17:07 - mmengine - INFO - Iter(train) [2300/7452] base_lr: 1.9411e-06 lr: 1.9411e-06 eta: 1:44:51 time: 1.0566 data_time: 0.0140 memory: 16131 loss: 0.1436 tflops: 6.6417 tokens_per_sec: 109.7846 +2025/09/25 21:17:18 - mmengine - INFO - Iter(train) [2310/7452] base_lr: 1.9404e-06 lr: 1.9404e-06 eta: 1:44:37 time: 1.0289 data_time: 0.0083 memory: 16131 loss: 0.0283 tflops: 6.9385 tokens_per_sec: 114.6873 +2025/09/25 21:17:29 - mmengine - INFO - Iter(train) [2320/7452] base_lr: 1.9396e-06 lr: 1.9396e-06 eta: 1:44:21 time: 1.2213 data_time: 0.0086 memory: 16131 loss: 0.0630 tflops: 4.5062 tokens_per_sec: 74.5101 +2025/09/25 21:17:40 - mmengine - INFO - Iter(train) [2330/7452] base_lr: 1.9389e-06 lr: 1.9389e-06 eta: 1:44:06 time: 1.0186 data_time: 0.0084 memory: 16131 loss: 0.0011 tflops: 6.1163 tokens_per_sec: 101.1174 +2025/09/25 21:17:51 - mmengine - INFO - Iter(train) [2340/7452] base_lr: 1.9381e-06 lr: 1.9381e-06 eta: 1:43:50 time: 1.0018 data_time: 0.0082 memory: 16131 loss: 0.0157 tflops: 7.5496 tokens_per_sec: 124.7759 +2025/09/25 21:18:01 - mmengine - INFO - Iter(train) [2350/7452] base_lr: 1.9373e-06 lr: 1.9373e-06 eta: 1:43:35 time: 1.0403 data_time: 0.0097 memory: 16130 loss: 0.0065 tflops: 6.4550 tokens_per_sec: 106.7045 +2025/09/25 21:18:12 - mmengine - INFO - Iter(train) [2360/7452] base_lr: 1.9366e-06 lr: 1.9366e-06 eta: 1:43:20 time: 0.9974 data_time: 0.0081 memory: 16130 loss: 1.7812 tflops: 4.0612 tokens_per_sec: 67.1738 +2025/09/25 21:18:23 - mmengine - INFO - Iter(train) [2370/7452] base_lr: 1.9358e-06 lr: 1.9358e-06 eta: 1:43:06 time: 1.0227 data_time: 0.0095 memory: 16131 loss: 0.0082 tflops: 5.8552 tokens_per_sec: 96.8051 +2025/09/25 21:18:34 - mmengine - INFO - Iter(train) [2380/7452] base_lr: 1.9350e-06 lr: 1.9350e-06 eta: 1:42:51 time: 1.0103 data_time: 0.0088 memory: 16131 loss: 0.2754 tflops: 6.4662 tokens_per_sec: 106.8940 +2025/09/25 21:18:45 - mmengine - INFO - Iter(train) [2390/7452] base_lr: 1.9342e-06 lr: 1.9342e-06 eta: 1:42:36 time: 1.0303 data_time: 0.0090 memory: 16130 loss: 0.0065 tflops: 6.4588 tokens_per_sec: 106.7691 +2025/09/25 21:18:56 - mmengine - INFO - Iter(train) [2400/7452] base_lr: 1.9334e-06 lr: 1.9334e-06 eta: 1:42:21 time: 1.2150 data_time: 0.0084 memory: 16131 loss: 0.0100 tflops: 5.8754 tokens_per_sec: 97.1154 +2025/09/25 21:19:07 - mmengine - INFO - Iter(train) [2410/7452] base_lr: 1.9326e-06 lr: 1.9326e-06 eta: 1:42:06 time: 1.2605 data_time: 0.0083 memory: 16131 loss: 2.0469 tflops: 4.6545 tokens_per_sec: 76.9558 +2025/09/25 21:19:18 - mmengine - INFO - Iter(train) [2420/7452] base_lr: 1.9318e-06 lr: 1.9318e-06 eta: 1:41:51 time: 1.0331 data_time: 0.0084 memory: 16131 loss: 2.2500 tflops: 6.9689 tokens_per_sec: 115.1872 +2025/09/25 21:19:29 - mmengine - INFO - Iter(train) [2430/7452] base_lr: 1.9310e-06 lr: 1.9310e-06 eta: 1:41:37 time: 1.0607 data_time: 0.0083 memory: 16130 loss: 1.7734 tflops: 5.3599 tokens_per_sec: 88.6216 +2025/09/25 21:19:40 - mmengine - INFO - Iter(train) [2440/7452] base_lr: 1.9302e-06 lr: 1.9302e-06 eta: 1:41:22 time: 1.0250 data_time: 0.0085 memory: 16131 loss: 0.0216 tflops: 6.5508 tokens_per_sec: 108.2888 +2025/09/25 21:19:51 - mmengine - INFO - Iter(train) [2450/7452] base_lr: 1.9294e-06 lr: 1.9294e-06 eta: 1:41:09 time: 1.3772 data_time: 0.0086 memory: 16130 loss: 2.0469 tflops: 3.1171 tokens_per_sec: 51.5557 +2025/09/25 21:20:02 - mmengine - INFO - Iter(train) [2460/7452] base_lr: 1.9286e-06 lr: 1.9286e-06 eta: 1:40:54 time: 1.2124 data_time: 0.0081 memory: 16130 loss: 0.0023 tflops: 5.3884 tokens_per_sec: 89.0772 +2025/09/25 21:20:13 - mmengine - INFO - Iter(train) [2470/7452] base_lr: 1.9277e-06 lr: 1.9277e-06 eta: 1:40:40 time: 1.0254 data_time: 0.0086 memory: 16131 loss: 2.9531 tflops: 3.0653 tokens_per_sec: 50.7107 +2025/09/25 21:20:24 - mmengine - INFO - Iter(train) [2480/7452] base_lr: 1.9269e-06 lr: 1.9269e-06 eta: 1:40:25 time: 1.0159 data_time: 0.0089 memory: 16131 loss: 0.0012 tflops: 7.7433 tokens_per_sec: 127.9698 +2025/09/25 21:20:35 - mmengine - INFO - Iter(train) [2490/7452] base_lr: 1.9261e-06 lr: 1.9261e-06 eta: 1:40:10 time: 1.0280 data_time: 0.0084 memory: 16131 loss: 1.7031 tflops: 4.6470 tokens_per_sec: 76.8501 +2025/09/25 21:20:45 - mmengine - INFO - Iter(train) [2500/7452] base_lr: 1.9252e-06 lr: 1.9252e-06 eta: 1:39:55 time: 1.0084 data_time: 0.0083 memory: 16131 loss: 1.8984 tflops: 4.6774 tokens_per_sec: 77.3536 +2025/09/25 21:20:56 - mmengine - INFO - Iter(train) [2510/7452] base_lr: 1.9244e-06 lr: 1.9244e-06 eta: 1:39:41 time: 1.0338 data_time: 0.0081 memory: 16130 loss: 0.0581 tflops: 6.7298 tokens_per_sec: 111.2416 +2025/09/25 21:21:07 - mmengine - INFO - Iter(train) [2520/7452] base_lr: 1.9235e-06 lr: 1.9235e-06 eta: 1:39:27 time: 1.0076 data_time: 0.0084 memory: 16131 loss: 1.8750 tflops: 4.5006 tokens_per_sec: 74.4327 +2025/09/25 21:21:18 - mmengine - INFO - Iter(train) [2530/7452] base_lr: 1.9227e-06 lr: 1.9227e-06 eta: 1:39:11 time: 1.0062 data_time: 0.0086 memory: 16130 loss: 2.4219 tflops: 7.0344 tokens_per_sec: 116.2741 +2025/09/25 21:21:29 - mmengine - INFO - Iter(train) [2540/7452] base_lr: 1.9218e-06 lr: 1.9218e-06 eta: 1:38:57 time: 1.0164 data_time: 0.0083 memory: 16130 loss: 2.1094 tflops: 6.3086 tokens_per_sec: 104.2911 +2025/09/25 21:21:40 - mmengine - INFO - Iter(train) [2550/7452] base_lr: 1.9210e-06 lr: 1.9210e-06 eta: 1:38:43 time: 1.0157 data_time: 0.0082 memory: 16131 loss: 0.0308 tflops: 6.5515 tokens_per_sec: 108.3009 +2025/09/25 21:21:50 - mmengine - INFO - Iter(train) [2560/7452] base_lr: 1.9201e-06 lr: 1.9201e-06 eta: 1:38:28 time: 1.3182 data_time: 0.0082 memory: 16131 loss: 0.0012 tflops: 4.9560 tokens_per_sec: 81.9288 +2025/09/25 21:21:51 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 21:21:51 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:21:52 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern within the slide.<|im_end|> + +2025/09/25 21:21:52 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:22:04 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the biopsy specimen is a 1.5 cm core of tissue obtained from the right breast. The report notes the presence of invasive ductal carcinoma, which is a type of breast cancer. The carcinoma is described as being of the triple negative subtype, meaning it does not express estrogen receptors, progesterone receptors, or HER2/neu receptors. The report also mentions the presence of ductal carcinoma in situ (DCIS), which is a non-invasive form of breast cancer. The report further describes the presence of microcalcifications, which are small deposits of calcium within the breast tissue. The report concludes by stating that the biopsy specimen is consistent with invasive ductal carcinoma of the right breast.<|im_end|> + +2025/09/25 21:22:04 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:22:05 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 21:22:53 - mmengine - INFO - Iter(train) [2570/7452] base_lr: 1.9192e-06 lr: 1.9192e-06 eta: 1:39:51 time: 1.0304 data_time: 0.0083 memory: 16130 loss: 1.6328 tflops: 4.4599 tokens_per_sec: 73.7588 +2025/09/25 21:23:04 - mmengine - INFO - Iter(train) [2580/7452] base_lr: 1.9183e-06 lr: 1.9183e-06 eta: 1:39:37 time: 1.0408 data_time: 0.0089 memory: 16130 loss: 2.2344 tflops: 7.0922 tokens_per_sec: 117.2208 +2025/09/25 21:23:15 - mmengine - INFO - Iter(train) [2590/7452] base_lr: 1.9175e-06 lr: 1.9175e-06 eta: 1:39:22 time: 1.0308 data_time: 0.0077 memory: 16130 loss: 2.2500 tflops: 6.1029 tokens_per_sec: 100.8944 +2025/09/25 21:23:26 - mmengine - INFO - Iter(train) [2600/7452] base_lr: 1.9166e-06 lr: 1.9166e-06 eta: 1:39:07 time: 1.0442 data_time: 0.0086 memory: 16131 loss: 1.8203 tflops: 4.1690 tokens_per_sec: 68.9520 +2025/09/25 21:23:36 - mmengine - INFO - Iter(train) [2610/7452] base_lr: 1.9157e-06 lr: 1.9157e-06 eta: 1:38:52 time: 1.2559 data_time: 0.0086 memory: 16131 loss: 2.2812 tflops: 6.1186 tokens_per_sec: 101.1225 +2025/09/25 21:23:47 - mmengine - INFO - Iter(train) [2620/7452] base_lr: 1.9148e-06 lr: 1.9148e-06 eta: 1:38:37 time: 1.0055 data_time: 0.0085 memory: 16131 loss: 0.0121 tflops: 5.3528 tokens_per_sec: 88.5110 +2025/09/25 21:23:58 - mmengine - INFO - Iter(train) [2630/7452] base_lr: 1.9139e-06 lr: 1.9139e-06 eta: 1:38:22 time: 1.0259 data_time: 0.0083 memory: 16131 loss: 0.1650 tflops: 7.3720 tokens_per_sec: 121.8416 +2025/09/25 21:24:09 - mmengine - INFO - Iter(train) [2640/7452] base_lr: 1.9130e-06 lr: 1.9130e-06 eta: 1:38:08 time: 1.0275 data_time: 0.0084 memory: 16131 loss: 0.0128 tflops: 6.2996 tokens_per_sec: 104.1408 +2025/09/25 21:24:20 - mmengine - INFO - Iter(train) [2650/7452] base_lr: 1.9121e-06 lr: 1.9121e-06 eta: 1:37:53 time: 1.0133 data_time: 0.0084 memory: 16131 loss: 1.7422 tflops: 4.1766 tokens_per_sec: 69.0793 +2025/09/25 21:24:30 - mmengine - INFO - Iter(train) [2660/7452] base_lr: 1.9112e-06 lr: 1.9112e-06 eta: 1:37:38 time: 1.0383 data_time: 0.0085 memory: 16131 loss: 0.0271 tflops: 6.7591 tokens_per_sec: 111.7251 +2025/09/25 21:24:41 - mmengine - INFO - Iter(train) [2670/7452] base_lr: 1.9103e-06 lr: 1.9103e-06 eta: 1:37:23 time: 1.2975 data_time: 0.0081 memory: 16130 loss: 2.1406 tflops: 5.5488 tokens_per_sec: 91.7145 +2025/09/25 21:24:52 - mmengine - INFO - Iter(train) [2680/7452] base_lr: 1.9093e-06 lr: 1.9093e-06 eta: 1:37:09 time: 1.0025 data_time: 0.0072 memory: 16131 loss: 0.1143 tflops: 7.1211 tokens_per_sec: 117.7042 +2025/09/25 21:25:03 - mmengine - INFO - Iter(train) [2690/7452] base_lr: 1.9084e-06 lr: 1.9084e-06 eta: 1:36:54 time: 1.0243 data_time: 0.0083 memory: 16131 loss: 0.0008 tflops: 7.6797 tokens_per_sec: 126.9178 +2025/09/25 21:25:14 - mmengine - INFO - Iter(train) [2700/7452] base_lr: 1.9075e-06 lr: 1.9075e-06 eta: 1:36:39 time: 1.0436 data_time: 0.0082 memory: 16131 loss: 0.0109 tflops: 6.4341 tokens_per_sec: 106.3594 +2025/09/25 21:25:25 - mmengine - INFO - Iter(train) [2710/7452] base_lr: 1.9065e-06 lr: 1.9065e-06 eta: 1:36:25 time: 1.2311 data_time: 0.0084 memory: 16131 loss: 0.0058 tflops: 6.0451 tokens_per_sec: 99.9124 +2025/09/25 21:25:35 - mmengine - INFO - Iter(train) [2720/7452] base_lr: 1.9056e-06 lr: 1.9056e-06 eta: 1:36:09 time: 1.0236 data_time: 0.0083 memory: 16131 loss: 1.9922 tflops: 3.8980 tokens_per_sec: 64.4753 +2025/09/25 21:25:46 - mmengine - INFO - Iter(train) [2730/7452] base_lr: 1.9047e-06 lr: 1.9047e-06 eta: 1:35:55 time: 1.0770 data_time: 0.0086 memory: 16130 loss: 0.0012 tflops: 6.2345 tokens_per_sec: 103.0594 +2025/09/25 21:25:57 - mmengine - INFO - Iter(train) [2740/7452] base_lr: 1.9037e-06 lr: 1.9037e-06 eta: 1:35:40 time: 1.0123 data_time: 0.0087 memory: 16131 loss: 0.0820 tflops: 6.5138 tokens_per_sec: 107.6790 +2025/09/25 21:26:08 - mmengine - INFO - Iter(train) [2750/7452] base_lr: 1.9028e-06 lr: 1.9028e-06 eta: 1:35:26 time: 1.1433 data_time: 0.0088 memory: 16131 loss: 0.0393 tflops: 6.4030 tokens_per_sec: 105.8311 +2025/09/25 21:26:20 - mmengine - INFO - Iter(train) [2760/7452] base_lr: 1.9018e-06 lr: 1.9018e-06 eta: 1:35:13 time: 1.1033 data_time: 0.0084 memory: 16130 loss: 0.0161 tflops: 6.5253 tokens_per_sec: 107.8556 +2025/09/25 21:26:31 - mmengine - INFO - Iter(train) [2770/7452] base_lr: 1.9008e-06 lr: 1.9008e-06 eta: 1:35:00 time: 0.9975 data_time: 0.0081 memory: 16133 loss: 2.1094 tflops: 5.7602 tokens_per_sec: 95.2399 +2025/09/25 21:26:41 - mmengine - INFO - Iter(train) [2780/7452] base_lr: 1.8999e-06 lr: 1.8999e-06 eta: 1:34:44 time: 1.0080 data_time: 0.0083 memory: 16131 loss: 0.0520 tflops: 6.4812 tokens_per_sec: 107.1429 +2025/09/25 21:26:52 - mmengine - INFO - Iter(train) [2790/7452] base_lr: 1.8989e-06 lr: 1.8989e-06 eta: 1:34:30 time: 1.0035 data_time: 0.0088 memory: 16131 loss: 1.9297 tflops: 7.8992 tokens_per_sec: 130.5438 +2025/09/25 21:27:03 - mmengine - INFO - Iter(train) [2800/7452] base_lr: 1.8979e-06 lr: 1.8979e-06 eta: 1:34:16 time: 1.0247 data_time: 0.0088 memory: 16130 loss: 0.1357 tflops: 7.3220 tokens_per_sec: 121.0156 +2025/09/25 21:27:14 - mmengine - INFO - Iter(train) [2810/7452] base_lr: 1.8970e-06 lr: 1.8970e-06 eta: 1:34:01 time: 1.0198 data_time: 0.0090 memory: 16131 loss: 2.5312 tflops: 5.0406 tokens_per_sec: 83.3537 +2025/09/25 21:27:25 - mmengine - INFO - Iter(train) [2820/7452] base_lr: 1.8960e-06 lr: 1.8960e-06 eta: 1:33:47 time: 1.0418 data_time: 0.0087 memory: 16130 loss: 0.0011 tflops: 6.9110 tokens_per_sec: 114.2302 +2025/09/25 21:27:36 - mmengine - INFO - Iter(train) [2830/7452] base_lr: 1.8950e-06 lr: 1.8950e-06 eta: 1:33:34 time: 1.2218 data_time: 0.0087 memory: 16131 loss: 0.0386 tflops: 5.7436 tokens_per_sec: 94.9385 +2025/09/25 21:27:46 - mmengine - INFO - Iter(train) [2840/7452] base_lr: 1.8940e-06 lr: 1.8940e-06 eta: 1:33:19 time: 1.0130 data_time: 0.0090 memory: 16131 loss: 2.3438 tflops: 9.8605 tokens_per_sec: 162.8826 +2025/09/25 21:27:57 - mmengine - INFO - Iter(train) [2850/7452] base_lr: 1.8930e-06 lr: 1.8930e-06 eta: 1:33:04 time: 1.2717 data_time: 0.0084 memory: 16131 loss: 2.0938 tflops: 4.9468 tokens_per_sec: 81.7811 +2025/09/25 21:28:08 - mmengine - INFO - Iter(train) [2860/7452] base_lr: 1.8920e-06 lr: 1.8920e-06 eta: 1:32:50 time: 1.0067 data_time: 0.0086 memory: 16131 loss: 0.0737 tflops: 6.4894 tokens_per_sec: 107.2781 +2025/09/25 21:28:18 - mmengine - INFO - Iter(train) [2870/7452] base_lr: 1.8910e-06 lr: 1.8910e-06 eta: 1:32:35 time: 1.0198 data_time: 0.0083 memory: 16130 loss: 0.1982 tflops: 6.8224 tokens_per_sec: 112.7717 +2025/09/25 21:28:30 - mmengine - INFO - Iter(train) [2880/7452] base_lr: 1.8900e-06 lr: 1.8900e-06 eta: 1:32:21 time: 1.3319 data_time: 0.0084 memory: 16131 loss: 2.5312 tflops: 3.3140 tokens_per_sec: 54.8100 +2025/09/25 21:28:40 - mmengine - INFO - Iter(train) [2890/7452] base_lr: 1.8890e-06 lr: 1.8890e-06 eta: 1:32:07 time: 1.0045 data_time: 0.0086 memory: 16130 loss: 0.0583 tflops: 6.7453 tokens_per_sec: 111.5024 +2025/09/25 21:28:51 - mmengine - INFO - Iter(train) [2900/7452] base_lr: 1.8879e-06 lr: 1.8879e-06 eta: 1:31:52 time: 1.0080 data_time: 0.0080 memory: 16130 loss: 1.6250 tflops: 3.8385 tokens_per_sec: 63.4925 +2025/09/25 21:29:02 - mmengine - INFO - Iter(train) [2910/7452] base_lr: 1.8869e-06 lr: 1.8869e-06 eta: 1:31:39 time: 1.2312 data_time: 0.0087 memory: 16131 loss: 1.6484 tflops: 2.9952 tokens_per_sec: 49.5446 +2025/09/25 21:29:13 - mmengine - INFO - Iter(train) [2920/7452] base_lr: 1.8859e-06 lr: 1.8859e-06 eta: 1:31:25 time: 1.0147 data_time: 0.0083 memory: 16130 loss: 0.0051 tflops: 7.3341 tokens_per_sec: 121.2171 +2025/09/25 21:29:23 - mmengine - INFO - Iter(train) [2930/7452] base_lr: 1.8849e-06 lr: 1.8849e-06 eta: 1:31:10 time: 1.2807 data_time: 0.0082 memory: 16131 loss: 0.0009 tflops: 5.9528 tokens_per_sec: 98.3838 +2025/09/25 21:29:34 - mmengine - INFO - Iter(train) [2940/7452] base_lr: 1.8838e-06 lr: 1.8838e-06 eta: 1:30:56 time: 1.0052 data_time: 0.0083 memory: 16131 loss: 0.2637 tflops: 6.1375 tokens_per_sec: 101.4681 +2025/09/25 21:29:45 - mmengine - INFO - Iter(train) [2950/7452] base_lr: 1.8828e-06 lr: 1.8828e-06 eta: 1:30:43 time: 1.0202 data_time: 0.0084 memory: 16131 loss: 0.0047 tflops: 7.2353 tokens_per_sec: 119.5870 +2025/09/25 21:29:56 - mmengine - INFO - Iter(train) [2960/7452] base_lr: 1.8817e-06 lr: 1.8817e-06 eta: 1:30:28 time: 1.0419 data_time: 0.0086 memory: 16131 loss: 0.0009 tflops: 6.6772 tokens_per_sec: 110.3725 +2025/09/25 21:30:07 - mmengine - INFO - Iter(train) [2970/7452] base_lr: 1.8807e-06 lr: 1.8807e-06 eta: 1:30:15 time: 0.9828 data_time: 0.0076 memory: 16131 loss: 1.6094 tflops: 7.6952 tokens_per_sec: 127.1821 +2025/09/25 21:30:18 - mmengine - INFO - Iter(train) [2980/7452] base_lr: 1.8796e-06 lr: 1.8796e-06 eta: 1:30:01 time: 1.0330 data_time: 0.0085 memory: 16131 loss: 2.6562 tflops: 5.2688 tokens_per_sec: 87.1209 +2025/09/25 21:30:28 - mmengine - INFO - Iter(train) [2990/7452] base_lr: 1.8786e-06 lr: 1.8786e-06 eta: 1:29:47 time: 1.0013 data_time: 0.0087 memory: 16131 loss: 1.2578 tflops: 3.8038 tokens_per_sec: 62.9185 +2025/09/25 21:30:39 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 21:30:39 - mmengine - INFO - Iter(train) [3000/7452] base_lr: 1.8775e-06 lr: 1.8775e-06 eta: 1:29:33 time: 0.9909 data_time: 0.0083 memory: 16131 loss: 1.4922 tflops: 6.1654 tokens_per_sec: 101.9309 +2025/09/25 21:30:50 - mmengine - INFO - Iter(train) [3010/7452] base_lr: 1.8765e-06 lr: 1.8765e-06 eta: 1:29:19 time: 0.9925 data_time: 0.0083 memory: 16131 loss: 0.0096 tflops: 7.8645 tokens_per_sec: 129.9735 +2025/09/25 21:31:01 - mmengine - INFO - Iter(train) [3020/7452] base_lr: 1.8754e-06 lr: 1.8754e-06 eta: 1:29:05 time: 1.3164 data_time: 0.0086 memory: 16130 loss: 0.1660 tflops: 5.1930 tokens_per_sec: 85.8405 +2025/09/25 21:31:12 - mmengine - INFO - Iter(train) [3030/7452] base_lr: 1.8743e-06 lr: 1.8743e-06 eta: 1:28:51 time: 1.1983 data_time: 0.0083 memory: 16131 loss: 0.0364 tflops: 6.6655 tokens_per_sec: 110.1540 +2025/09/25 21:31:23 - mmengine - INFO - Iter(train) [3040/7452] base_lr: 1.8732e-06 lr: 1.8732e-06 eta: 1:28:37 time: 1.0051 data_time: 0.0084 memory: 16131 loss: 0.0005 tflops: 8.1879 tokens_per_sec: 135.3054 +2025/09/25 21:31:34 - mmengine - INFO - Iter(train) [3050/7452] base_lr: 1.8722e-06 lr: 1.8722e-06 eta: 1:28:24 time: 1.2688 data_time: 0.0087 memory: 16131 loss: 1.1953 tflops: 4.7195 tokens_per_sec: 78.0285 +2025/09/25 21:31:45 - mmengine - INFO - Iter(train) [3060/7452] base_lr: 1.8711e-06 lr: 1.8711e-06 eta: 1:28:10 time: 1.0076 data_time: 0.0087 memory: 16131 loss: 2.0625 tflops: 5.9428 tokens_per_sec: 98.2536 +2025/09/25 21:31:55 - mmengine - INFO - Iter(train) [3070/7452] base_lr: 1.8700e-06 lr: 1.8700e-06 eta: 1:27:56 time: 1.0178 data_time: 0.0080 memory: 16131 loss: 0.1050 tflops: 6.1210 tokens_per_sec: 101.1949 +2025/09/25 21:31:57 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 21:31:57 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:31:59 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern.<|im_end|> + +2025/09/25 21:31:59 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:32:14 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the biopsy specimen is a 1.5 cm core of tissue obtained from the right breast. The report notes the presence of invasive ductal carcinoma, which is a type of breast cancer. The tumor is described as being well differentiated, meaning it is less aggressive and has a better prognosis compared to poorly differentiated tumors. The report also mentions the presence of lymphovascular invasion, which suggests that the cancer cells have entered the lymphatic vessels, potentially indicating a higher risk of metastasis. Additionally, the report notes the presence of necrosis, which is the death of cancer cells and can be a sign of more aggressive disease. The report also mentions the presence of tumor cells in the stroma, which is the connective tissue surrounding the tumor. The report concludes by stating that the tumor is 1.5 cm in size and is located in the upper outer quadrant of the right breast.<|im_end|> + +2025/09/25 21:32:14 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:32:15 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/25 21:32:28 - mmengine - INFO - Iter(train) [3080/7452] base_lr: 1.8689e-06 lr: 1.8689e-06 eta: 1:28:13 time: 0.9984 data_time: 0.0085 memory: 16130 loss: 0.0061 tflops: 6.9078 tokens_per_sec: 114.1854 +2025/09/25 21:32:39 - mmengine - INFO - Iter(train) [3090/7452] base_lr: 1.8678e-06 lr: 1.8678e-06 eta: 1:27:59 time: 1.0217 data_time: 0.0089 memory: 16130 loss: 0.0061 tflops: 6.7498 tokens_per_sec: 111.5745 +2025/09/25 21:32:50 - mmengine - INFO - Iter(train) [3100/7452] base_lr: 1.8667e-06 lr: 1.8667e-06 eta: 1:27:46 time: 1.0127 data_time: 0.0085 memory: 16131 loss: 1.2500 tflops: 5.9727 tokens_per_sec: 98.7465 +2025/09/25 21:33:01 - mmengine - INFO - Iter(train) [3110/7452] base_lr: 1.8656e-06 lr: 1.8656e-06 eta: 1:27:32 time: 1.1207 data_time: 0.0084 memory: 16131 loss: 2.6562 tflops: 5.3433 tokens_per_sec: 88.3415 +2025/09/25 21:33:12 - mmengine - INFO - Iter(train) [3120/7452] base_lr: 1.8644e-06 lr: 1.8644e-06 eta: 1:27:19 time: 1.0989 data_time: 0.0075 memory: 16131 loss: 1.6016 tflops: 4.9532 tokens_per_sec: 81.9016 +2025/09/25 21:33:23 - mmengine - INFO - Iter(train) [3130/7452] base_lr: 1.8633e-06 lr: 1.8633e-06 eta: 1:27:06 time: 1.0892 data_time: 0.0086 memory: 16131 loss: 0.0028 tflops: 7.2219 tokens_per_sec: 119.3526 +2025/09/25 21:33:34 - mmengine - INFO - Iter(train) [3140/7452] base_lr: 1.8622e-06 lr: 1.8622e-06 eta: 1:26:52 time: 1.0166 data_time: 0.0084 memory: 16130 loss: 0.0415 tflops: 6.1282 tokens_per_sec: 101.3137 +2025/09/25 21:33:45 - mmengine - INFO - Iter(train) [3150/7452] base_lr: 1.8611e-06 lr: 1.8611e-06 eta: 1:26:39 time: 1.2294 data_time: 0.0087 memory: 16132 loss: 0.0119 tflops: 5.9052 tokens_per_sec: 97.6052 +2025/09/25 21:33:56 - mmengine - INFO - Iter(train) [3160/7452] base_lr: 1.8600e-06 lr: 1.8600e-06 eta: 1:26:24 time: 1.0177 data_time: 0.0083 memory: 16131 loss: 1.3125 tflops: 4.5155 tokens_per_sec: 74.6783 +2025/09/25 21:34:07 - mmengine - INFO - Iter(train) [3170/7452] base_lr: 1.8588e-06 lr: 1.8588e-06 eta: 1:26:11 time: 0.9914 data_time: 0.0084 memory: 16131 loss: 0.3789 tflops: 6.1009 tokens_per_sec: 100.8666 +2025/09/25 21:34:18 - mmengine - INFO - Iter(train) [3180/7452] base_lr: 1.8577e-06 lr: 1.8577e-06 eta: 1:25:57 time: 1.2132 data_time: 0.0082 memory: 16131 loss: 2.0312 tflops: 3.1891 tokens_per_sec: 52.7513 +2025/09/25 21:34:28 - mmengine - INFO - Iter(train) [3190/7452] base_lr: 1.8565e-06 lr: 1.8565e-06 eta: 1:25:43 time: 1.0169 data_time: 0.0084 memory: 16131 loss: 0.0444 tflops: 6.6034 tokens_per_sec: 109.1580 +2025/09/25 21:34:39 - mmengine - INFO - Iter(train) [3200/7452] base_lr: 1.8554e-06 lr: 1.8554e-06 eta: 1:25:29 time: 1.4616 data_time: 0.0091 memory: 16131 loss: 0.0064 tflops: 4.8844 tokens_per_sec: 80.7352 +2025/09/25 21:34:50 - mmengine - INFO - Iter(train) [3210/7452] base_lr: 1.8542e-06 lr: 1.8542e-06 eta: 1:25:16 time: 1.0110 data_time: 0.0085 memory: 16131 loss: 1.6719 tflops: 4.8447 tokens_per_sec: 80.1182 +2025/09/25 21:35:00 - mmengine - INFO - Iter(train) [3220/7452] base_lr: 1.8531e-06 lr: 1.8531e-06 eta: 1:25:01 time: 1.0362 data_time: 0.0083 memory: 16131 loss: 2.0938 tflops: 4.5515 tokens_per_sec: 75.2717 +2025/09/25 21:35:11 - mmengine - INFO - Iter(train) [3230/7452] base_lr: 1.8519e-06 lr: 1.8519e-06 eta: 1:24:48 time: 1.0029 data_time: 0.0081 memory: 16131 loss: 2.8906 tflops: 7.3601 tokens_per_sec: 121.6487 +2025/09/25 21:35:22 - mmengine - INFO - Iter(train) [3240/7452] base_lr: 1.8508e-06 lr: 1.8508e-06 eta: 1:24:34 time: 1.0209 data_time: 0.0084 memory: 16131 loss: 0.0593 tflops: 6.9927 tokens_per_sec: 115.5832 +2025/09/25 21:35:33 - mmengine - INFO - Iter(train) [3250/7452] base_lr: 1.8496e-06 lr: 1.8496e-06 eta: 1:24:20 time: 1.0035 data_time: 0.0086 memory: 16131 loss: 2.6406 tflops: 4.4590 tokens_per_sec: 73.7456 +2025/09/25 21:35:43 - mmengine - INFO - Iter(train) [3260/7452] base_lr: 1.8484e-06 lr: 1.8484e-06 eta: 1:24:06 time: 0.9976 data_time: 0.0082 memory: 16130 loss: 0.0110 tflops: 6.8527 tokens_per_sec: 113.2767 +2025/09/25 21:35:54 - mmengine - INFO - Iter(train) [3270/7452] base_lr: 1.8472e-06 lr: 1.8472e-06 eta: 1:23:53 time: 1.0169 data_time: 0.0084 memory: 16131 loss: 0.0008 tflops: 7.5563 tokens_per_sec: 124.8837 +2025/09/25 21:36:05 - mmengine - INFO - Iter(train) [3280/7452] base_lr: 1.8461e-06 lr: 1.8461e-06 eta: 1:23:39 time: 1.0128 data_time: 0.0088 memory: 16131 loss: 0.0140 tflops: 7.0486 tokens_per_sec: 116.5061 +2025/09/25 21:36:15 - mmengine - INFO - Iter(train) [3290/7452] base_lr: 1.8449e-06 lr: 1.8449e-06 eta: 1:23:25 time: 1.0192 data_time: 0.0086 memory: 16131 loss: 0.0031 tflops: 6.5289 tokens_per_sec: 107.9283 +2025/09/25 21:36:26 - mmengine - INFO - Iter(train) [3300/7452] base_lr: 1.8437e-06 lr: 1.8437e-06 eta: 1:23:12 time: 1.0143 data_time: 0.0083 memory: 16131 loss: 2.4844 tflops: 4.5306 tokens_per_sec: 74.9283 +2025/09/25 21:36:37 - mmengine - INFO - Iter(train) [3310/7452] base_lr: 1.8425e-06 lr: 1.8425e-06 eta: 1:22:58 time: 1.2411 data_time: 0.0079 memory: 16130 loss: 0.0640 tflops: 5.1175 tokens_per_sec: 84.6016 +2025/09/25 21:36:47 - mmengine - INFO - Iter(train) [3320/7452] base_lr: 1.8413e-06 lr: 1.8413e-06 eta: 1:22:44 time: 1.0114 data_time: 0.0082 memory: 16132 loss: 1.9375 tflops: 6.8189 tokens_per_sec: 112.7163 +2025/09/25 21:36:59 - mmengine - INFO - Iter(train) [3330/7452] base_lr: 1.8401e-06 lr: 1.8401e-06 eta: 1:22:31 time: 1.0695 data_time: 0.0138 memory: 16131 loss: 0.0269 tflops: 5.0328 tokens_per_sec: 83.2201 +2025/09/25 21:37:09 - mmengine - INFO - Iter(train) [3340/7452] base_lr: 1.8389e-06 lr: 1.8389e-06 eta: 1:22:17 time: 1.0425 data_time: 0.0131 memory: 16131 loss: 1.4062 tflops: 4.7565 tokens_per_sec: 78.6575 +2025/09/25 21:37:20 - mmengine - INFO - Iter(train) [3350/7452] base_lr: 1.8377e-06 lr: 1.8377e-06 eta: 1:22:03 time: 1.2348 data_time: 0.0080 memory: 16131 loss: 0.0273 tflops: 8.0403 tokens_per_sec: 132.8174 +2025/09/25 21:37:31 - mmengine - INFO - Iter(train) [3360/7452] base_lr: 1.8365e-06 lr: 1.8365e-06 eta: 1:21:50 time: 0.9930 data_time: 0.0082 memory: 16130 loss: 0.0026 tflops: 7.5552 tokens_per_sec: 124.8701 +2025/09/25 21:37:41 - mmengine - INFO - Iter(train) [3370/7452] base_lr: 1.8352e-06 lr: 1.8352e-06 eta: 1:21:36 time: 1.0328 data_time: 0.0086 memory: 16130 loss: 1.4297 tflops: 5.4459 tokens_per_sec: 90.0454 +2025/09/25 21:37:52 - mmengine - INFO - Iter(train) [3380/7452] base_lr: 1.8340e-06 lr: 1.8340e-06 eta: 1:21:23 time: 1.2011 data_time: 0.0086 memory: 16130 loss: 0.0008 tflops: 6.4985 tokens_per_sec: 107.3980 +2025/09/25 21:38:03 - mmengine - INFO - Iter(train) [3390/7452] base_lr: 1.8328e-06 lr: 1.8328e-06 eta: 1:21:10 time: 1.2981 data_time: 0.0087 memory: 16131 loss: 2.0312 tflops: 4.0063 tokens_per_sec: 66.2484 +2025/09/25 21:38:14 - mmengine - INFO - Iter(train) [3400/7452] base_lr: 1.8316e-06 lr: 1.8316e-06 eta: 1:20:57 time: 1.0252 data_time: 0.0084 memory: 16131 loss: 0.2129 tflops: 6.2541 tokens_per_sec: 103.3912 +2025/09/25 21:38:25 - mmengine - INFO - Iter(train) [3410/7452] base_lr: 1.8303e-06 lr: 1.8303e-06 eta: 1:20:43 time: 1.0167 data_time: 0.0085 memory: 16131 loss: 0.0096 tflops: 7.2600 tokens_per_sec: 119.9945 +2025/09/25 21:38:35 - mmengine - INFO - Iter(train) [3420/7452] base_lr: 1.8291e-06 lr: 1.8291e-06 eta: 1:20:30 time: 1.0247 data_time: 0.0086 memory: 16131 loss: 0.0454 tflops: 7.0849 tokens_per_sec: 117.1043 +2025/09/25 21:38:46 - mmengine - INFO - Iter(train) [3430/7452] base_lr: 1.8279e-06 lr: 1.8279e-06 eta: 1:20:16 time: 1.0873 data_time: 0.0087 memory: 16131 loss: 1.9297 tflops: 4.3936 tokens_per_sec: 72.6589 +2025/09/25 21:38:57 - mmengine - INFO - Iter(train) [3440/7452] base_lr: 1.8266e-06 lr: 1.8266e-06 eta: 1:20:03 time: 1.0179 data_time: 0.0080 memory: 16130 loss: 1.6172 tflops: 5.3474 tokens_per_sec: 88.4208 +2025/09/25 21:39:08 - mmengine - INFO - Iter(train) [3450/7452] base_lr: 1.8254e-06 lr: 1.8254e-06 eta: 1:19:49 time: 1.0142 data_time: 0.0087 memory: 16130 loss: 0.0312 tflops: 6.3821 tokens_per_sec: 105.5062 +2025/09/25 21:39:19 - mmengine - INFO - Iter(train) [3460/7452] base_lr: 1.8241e-06 lr: 1.8241e-06 eta: 1:19:36 time: 1.0411 data_time: 0.0084 memory: 16130 loss: 2.6562 tflops: 4.8793 tokens_per_sec: 80.6874 +2025/09/25 21:39:29 - mmengine - INFO - Iter(train) [3470/7452] base_lr: 1.8228e-06 lr: 1.8228e-06 eta: 1:19:23 time: 1.2178 data_time: 0.0081 memory: 16130 loss: 0.0649 tflops: 5.7625 tokens_per_sec: 95.2520 +2025/09/25 21:39:40 - mmengine - INFO - Iter(train) [3480/7452] base_lr: 1.8216e-06 lr: 1.8216e-06 eta: 1:19:09 time: 1.0098 data_time: 0.0081 memory: 16131 loss: 0.0010 tflops: 7.7302 tokens_per_sec: 127.7535 +2025/09/25 21:39:50 - mmengine - INFO - Iter(train) [3490/7452] base_lr: 1.8203e-06 lr: 1.8203e-06 eta: 1:18:56 time: 1.1859 data_time: 0.0081 memory: 16131 loss: 0.0008 tflops: 5.7134 tokens_per_sec: 94.4453 +2025/09/25 21:40:01 - mmengine - INFO - Iter(train) [3500/7452] base_lr: 1.8190e-06 lr: 1.8190e-06 eta: 1:18:42 time: 1.0556 data_time: 0.0085 memory: 16130 loss: 0.0219 tflops: 5.9595 tokens_per_sec: 98.5240 +2025/09/25 21:40:12 - mmengine - INFO - Iter(train) [3510/7452] base_lr: 1.8178e-06 lr: 1.8178e-06 eta: 1:18:29 time: 1.0319 data_time: 0.0082 memory: 16131 loss: 1.6719 tflops: 5.2745 tokens_per_sec: 87.2143 +2025/09/25 21:40:23 - mmengine - INFO - Iter(train) [3520/7452] base_lr: 1.8165e-06 lr: 1.8165e-06 eta: 1:18:16 time: 1.2586 data_time: 0.0086 memory: 16131 loss: 0.0178 tflops: 4.9501 tokens_per_sec: 81.8364 +2025/09/25 21:40:34 - mmengine - INFO - Iter(train) [3530/7452] base_lr: 1.8152e-06 lr: 1.8152e-06 eta: 1:18:03 time: 1.0250 data_time: 0.0082 memory: 16131 loss: 0.1387 tflops: 6.2553 tokens_per_sec: 103.4111 +2025/09/25 21:40:45 - mmengine - INFO - Iter(train) [3540/7452] base_lr: 1.8139e-06 lr: 1.8139e-06 eta: 1:17:50 time: 1.2134 data_time: 0.0086 memory: 16131 loss: 2.1094 tflops: 3.4382 tokens_per_sec: 56.8664 +2025/09/25 21:40:55 - mmengine - INFO - Iter(train) [3550/7452] base_lr: 1.8126e-06 lr: 1.8126e-06 eta: 1:17:37 time: 1.0534 data_time: 0.0086 memory: 16131 loss: 1.7812 tflops: 4.5924 tokens_per_sec: 75.9462 +2025/09/25 21:41:06 - mmengine - INFO - Iter(train) [3560/7452] base_lr: 1.8113e-06 lr: 1.8113e-06 eta: 1:17:23 time: 1.0181 data_time: 0.0083 memory: 16131 loss: 1.7578 tflops: 4.1568 tokens_per_sec: 68.7523 +2025/09/25 21:41:17 - mmengine - INFO - Iter(train) [3570/7452] base_lr: 1.8100e-06 lr: 1.8100e-06 eta: 1:17:10 time: 0.9803 data_time: 0.0084 memory: 16130 loss: 0.1777 tflops: 5.9850 tokens_per_sec: 98.9537 +2025/09/25 21:41:28 - mmengine - INFO - Iter(train) [3580/7452] base_lr: 1.8087e-06 lr: 1.8087e-06 eta: 1:16:57 time: 1.0853 data_time: 0.0084 memory: 16131 loss: 1.8984 tflops: 6.8012 tokens_per_sec: 112.4116 +2025/09/25 21:41:32 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 21:41:32 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:42:04 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the lobules is lined by a single layer of cuboidal to columnar epithelial cells. The luminal surface is smooth and lacks any evidence of atypia or architectural distortion. The stromal tissue between the lobules is sparse and shows no evidence of fibrosis or inflammation. The basement membrane of the lobules is intact and shows no evidence of disruption or thickening. The tumor cells within the lobules are uniform in size and shape, with no evidence of pleomorphism or anisocytosis. The tumor cells are arranged in a single layer and show no evidence of interlobular invasion or stromal invasion. The tumor cells are well-differentiated and show no evidence of necrosis or mitotic activity. The tumor cells are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is lined by a single layer of cuboidal to columnar epithelial cells. The luminal surface is smooth and lacks any evidence of atypia or architectural distortion. The stromal tissue between the lobules is sparse and shows no evidence of fibrosis or inflammation. The basement membrane of the lobules is intact and shows no evidence of disruption or thickening. The tumor cells within the lobules are uniform in size and shape, with no evidence of pleomorphism or anisocytosis. The tumor cells are arranged in a single layer and show no evidence of interlobular invasion or stromal invasion. The tumor cells are well-differentiated and show no evidence of necrosis or mitotic activity.<|im_end|> + +2025/09/25 21:42:04 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:42:14 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the tissue sample is composed of a mixture of benign and malignant cells. The malignant cells are characterized by their irregular and pleomorphic nuclei, as well as the presence of mitotic figures. The report also notes the presence of necrosis, which is a significant finding in this case. Additionally, the report mentions the presence of lymphovascular invasion, which is a concerning feature. The report concludes by stating that the tumor is classified as invasive ductal carcinoma, and that it is estrogen receptor positive, progesterone receptor negative, and HER2 negative.<|im_end|> + +2025/09/25 21:42:14 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:42:15 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/25 21:42:40 - mmengine - INFO - Iter(train) [3590/7452] base_lr: 1.8074e-06 lr: 1.8074e-06 eta: 1:17:50 time: 1.0643 data_time: 0.0084 memory: 16130 loss: 0.0378 tflops: 5.8539 tokens_per_sec: 96.7783 +2025/09/25 21:42:51 - mmengine - INFO - Iter(train) [3600/7452] base_lr: 1.8061e-06 lr: 1.8061e-06 eta: 1:17:36 time: 1.0090 data_time: 0.0088 memory: 16131 loss: 2.0156 tflops: 3.2351 tokens_per_sec: 53.5184 +2025/09/25 21:43:01 - mmengine - INFO - Iter(train) [3610/7452] base_lr: 1.8048e-06 lr: 1.8048e-06 eta: 1:17:23 time: 1.0382 data_time: 0.0090 memory: 16131 loss: 1.5000 tflops: 4.3680 tokens_per_sec: 72.2402 +2025/09/25 21:43:12 - mmengine - INFO - Iter(train) [3620/7452] base_lr: 1.8035e-06 lr: 1.8035e-06 eta: 1:17:09 time: 1.0041 data_time: 0.0086 memory: 16130 loss: 1.2656 tflops: 4.2753 tokens_per_sec: 70.7108 +2025/09/25 21:43:23 - mmengine - INFO - Iter(train) [3630/7452] base_lr: 1.8022e-06 lr: 1.8022e-06 eta: 1:16:55 time: 1.0000 data_time: 0.0087 memory: 16131 loss: 2.4688 tflops: 5.3826 tokens_per_sec: 89.0033 +2025/09/25 21:43:33 - mmengine - INFO - Iter(train) [3640/7452] base_lr: 1.8008e-06 lr: 1.8008e-06 eta: 1:16:42 time: 1.2190 data_time: 0.0083 memory: 16131 loss: 0.0466 tflops: 6.3038 tokens_per_sec: 104.1838 +2025/09/25 21:43:44 - mmengine - INFO - Iter(train) [3650/7452] base_lr: 1.7995e-06 lr: 1.7995e-06 eta: 1:16:29 time: 0.9924 data_time: 0.0089 memory: 16130 loss: 0.0162 tflops: 6.6438 tokens_per_sec: 109.8295 +2025/09/25 21:43:55 - mmengine - INFO - Iter(train) [3660/7452] base_lr: 1.7982e-06 lr: 1.7982e-06 eta: 1:16:15 time: 1.0030 data_time: 0.0082 memory: 16131 loss: 0.0135 tflops: 6.0307 tokens_per_sec: 99.7050 +2025/09/25 21:44:06 - mmengine - INFO - Iter(train) [3670/7452] base_lr: 1.7968e-06 lr: 1.7968e-06 eta: 1:16:02 time: 1.0049 data_time: 0.0083 memory: 16131 loss: 0.0020 tflops: 7.6468 tokens_per_sec: 126.3790 +2025/09/25 21:44:16 - mmengine - INFO - Iter(train) [3680/7452] base_lr: 1.7955e-06 lr: 1.7955e-06 eta: 1:15:48 time: 1.0106 data_time: 0.0086 memory: 16130 loss: 0.1543 tflops: 6.4044 tokens_per_sec: 105.8746 +2025/09/25 21:44:27 - mmengine - INFO - Iter(train) [3690/7452] base_lr: 1.7941e-06 lr: 1.7941e-06 eta: 1:15:35 time: 1.0166 data_time: 0.0087 memory: 16131 loss: 2.0469 tflops: 5.2351 tokens_per_sec: 86.5660 +2025/09/25 21:44:38 - mmengine - INFO - Iter(train) [3700/7452] base_lr: 1.7928e-06 lr: 1.7928e-06 eta: 1:15:21 time: 1.0058 data_time: 0.0084 memory: 16131 loss: 0.0564 tflops: 7.9412 tokens_per_sec: 131.2363 +2025/09/25 21:44:49 - mmengine - INFO - Iter(train) [3710/7452] base_lr: 1.7914e-06 lr: 1.7914e-06 eta: 1:15:08 time: 1.0687 data_time: 0.0123 memory: 16130 loss: 1.4609 tflops: 4.8664 tokens_per_sec: 80.4706 +2025/09/25 21:44:59 - mmengine - INFO - Iter(train) [3720/7452] base_lr: 1.7901e-06 lr: 1.7901e-06 eta: 1:14:55 time: 1.0286 data_time: 0.0084 memory: 16131 loss: 1.2266 tflops: 4.4087 tokens_per_sec: 72.9138 +2025/09/25 21:45:10 - mmengine - INFO - Iter(train) [3730/7452] base_lr: 1.7887e-06 lr: 1.7887e-06 eta: 1:14:41 time: 1.2025 data_time: 0.0085 memory: 16131 loss: 1.5312 tflops: 4.2746 tokens_per_sec: 70.6858 +2025/09/25 21:45:21 - mmengine - INFO - Iter(train) [3740/7452] base_lr: 1.7873e-06 lr: 1.7873e-06 eta: 1:14:28 time: 1.0199 data_time: 0.0084 memory: 16131 loss: 1.9062 tflops: 4.2091 tokens_per_sec: 69.6152 +2025/09/25 21:45:32 - mmengine - INFO - Iter(train) [3750/7452] base_lr: 1.7860e-06 lr: 1.7860e-06 eta: 1:14:15 time: 1.2109 data_time: 0.0084 memory: 16131 loss: 1.8672 tflops: 3.2453 tokens_per_sec: 53.6797 +2025/09/25 21:45:42 - mmengine - INFO - Iter(train) [3760/7452] base_lr: 1.7846e-06 lr: 1.7846e-06 eta: 1:14:02 time: 1.2429 data_time: 0.0084 memory: 16131 loss: 2.2812 tflops: 4.8664 tokens_per_sec: 80.4557 +2025/09/25 21:45:53 - mmengine - INFO - Iter(train) [3770/7452] base_lr: 1.7832e-06 lr: 1.7832e-06 eta: 1:13:48 time: 1.2882 data_time: 0.0086 memory: 16131 loss: 0.0598 tflops: 6.0124 tokens_per_sec: 99.3663 +2025/09/25 21:46:04 - mmengine - INFO - Iter(train) [3780/7452] base_lr: 1.7818e-06 lr: 1.7818e-06 eta: 1:13:35 time: 1.0258 data_time: 0.0082 memory: 16131 loss: 1.5078 tflops: 5.4242 tokens_per_sec: 89.6872 +2025/09/25 21:46:15 - mmengine - INFO - Iter(train) [3790/7452] base_lr: 1.7805e-06 lr: 1.7805e-06 eta: 1:13:22 time: 1.2771 data_time: 0.0085 memory: 16131 loss: 0.0039 tflops: 5.0683 tokens_per_sec: 83.7858 +2025/09/25 21:46:26 - mmengine - INFO - Iter(train) [3800/7452] base_lr: 1.7791e-06 lr: 1.7791e-06 eta: 1:13:09 time: 1.0789 data_time: 0.0104 memory: 16131 loss: 1.5469 tflops: 4.7641 tokens_per_sec: 78.7804 +2025/09/25 21:46:37 - mmengine - INFO - Iter(train) [3810/7452] base_lr: 1.7777e-06 lr: 1.7777e-06 eta: 1:12:56 time: 1.0620 data_time: 0.0133 memory: 16130 loss: 1.9688 tflops: 5.4104 tokens_per_sec: 89.4562 +2025/09/25 21:46:48 - mmengine - INFO - Iter(train) [3820/7452] base_lr: 1.7763e-06 lr: 1.7763e-06 eta: 1:12:43 time: 1.2222 data_time: 0.0082 memory: 16131 loss: 2.1250 tflops: 3.4133 tokens_per_sec: 56.4560 +2025/09/25 21:46:58 - mmengine - INFO - Iter(train) [3830/7452] base_lr: 1.7749e-06 lr: 1.7749e-06 eta: 1:12:29 time: 1.0181 data_time: 0.0081 memory: 16131 loss: 1.3594 tflops: 4.9300 tokens_per_sec: 81.5256 +2025/09/25 21:47:09 - mmengine - INFO - Iter(train) [3840/7452] base_lr: 1.7735e-06 lr: 1.7735e-06 eta: 1:12:16 time: 1.2726 data_time: 0.0083 memory: 16130 loss: 0.0452 tflops: 4.5151 tokens_per_sec: 74.6527 +2025/09/25 21:47:20 - mmengine - INFO - Iter(train) [3850/7452] base_lr: 1.7721e-06 lr: 1.7721e-06 eta: 1:12:03 time: 1.2451 data_time: 0.0092 memory: 16131 loss: 2.0938 tflops: 6.1717 tokens_per_sec: 102.0004 +2025/09/25 21:47:31 - mmengine - INFO - Iter(train) [3860/7452] base_lr: 1.7707e-06 lr: 1.7707e-06 eta: 1:11:50 time: 1.2468 data_time: 0.0090 memory: 16131 loss: 0.0058 tflops: 5.4342 tokens_per_sec: 89.8289 +2025/09/25 21:47:41 - mmengine - INFO - Iter(train) [3870/7452] base_lr: 1.7692e-06 lr: 1.7692e-06 eta: 1:11:36 time: 1.2190 data_time: 0.0088 memory: 16130 loss: 2.0625 tflops: 3.1740 tokens_per_sec: 52.5011 +2025/09/25 21:47:52 - mmengine - INFO - Iter(train) [3880/7452] base_lr: 1.7678e-06 lr: 1.7678e-06 eta: 1:11:24 time: 1.0368 data_time: 0.0087 memory: 16131 loss: 0.0009 tflops: 7.4701 tokens_per_sec: 123.4569 +2025/09/25 21:48:03 - mmengine - INFO - Iter(train) [3890/7452] base_lr: 1.7664e-06 lr: 1.7664e-06 eta: 1:11:11 time: 1.2287 data_time: 0.0088 memory: 16131 loss: 0.0013 tflops: 5.8596 tokens_per_sec: 96.8521 +2025/09/25 21:48:14 - mmengine - INFO - Iter(train) [3900/7452] base_lr: 1.7650e-06 lr: 1.7650e-06 eta: 1:10:57 time: 1.1815 data_time: 0.0101 memory: 16131 loss: 1.7656 tflops: 6.6066 tokens_per_sec: 109.1844 +2025/09/25 21:48:25 - mmengine - INFO - Iter(train) [3910/7452] base_lr: 1.7635e-06 lr: 1.7635e-06 eta: 1:10:45 time: 1.0096 data_time: 0.0088 memory: 16130 loss: 0.0149 tflops: 6.6512 tokens_per_sec: 109.9484 +2025/09/25 21:48:36 - mmengine - INFO - Iter(train) [3920/7452] base_lr: 1.7621e-06 lr: 1.7621e-06 eta: 1:10:32 time: 1.0184 data_time: 0.0085 memory: 16131 loss: 0.0476 tflops: 6.1769 tokens_per_sec: 102.1173 +2025/09/25 21:48:46 - mmengine - INFO - Iter(train) [3930/7452] base_lr: 1.7607e-06 lr: 1.7607e-06 eta: 1:10:18 time: 1.0167 data_time: 0.0084 memory: 16130 loss: 2.5469 tflops: 3.8651 tokens_per_sec: 63.9317 +2025/09/25 21:48:57 - mmengine - INFO - Iter(train) [3940/7452] base_lr: 1.7592e-06 lr: 1.7592e-06 eta: 1:10:06 time: 1.2946 data_time: 0.0084 memory: 16131 loss: 0.0562 tflops: 5.7017 tokens_per_sec: 94.2385 +2025/09/25 21:49:08 - mmengine - INFO - Iter(train) [3950/7452] base_lr: 1.7578e-06 lr: 1.7578e-06 eta: 1:09:52 time: 1.0267 data_time: 0.0085 memory: 16130 loss: 2.6094 tflops: 5.8909 tokens_per_sec: 97.3950 +2025/09/25 21:49:18 - mmengine - INFO - Iter(train) [3960/7452] base_lr: 1.7563e-06 lr: 1.7563e-06 eta: 1:09:39 time: 1.2473 data_time: 0.0086 memory: 16131 loss: 2.2031 tflops: 3.9753 tokens_per_sec: 65.7396 +2025/09/25 21:49:30 - mmengine - INFO - Iter(train) [3970/7452] base_lr: 1.7549e-06 lr: 1.7549e-06 eta: 1:09:27 time: 1.0105 data_time: 0.0087 memory: 16130 loss: 1.1875 tflops: 4.1885 tokens_per_sec: 69.2755 +2025/09/25 21:49:40 - mmengine - INFO - Iter(train) [3980/7452] base_lr: 1.7534e-06 lr: 1.7534e-06 eta: 1:09:14 time: 1.0384 data_time: 0.0084 memory: 16131 loss: 1.9219 tflops: 4.3672 tokens_per_sec: 72.2262 +2025/09/25 21:49:51 - mmengine - INFO - Iter(train) [3990/7452] base_lr: 1.7520e-06 lr: 1.7520e-06 eta: 1:09:00 time: 1.0214 data_time: 0.0081 memory: 16131 loss: 0.0093 tflops: 6.6337 tokens_per_sec: 109.6581 +2025/09/25 21:50:02 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 21:50:02 - mmengine - INFO - Iter(train) [4000/7452] base_lr: 1.7505e-06 lr: 1.7505e-06 eta: 1:08:48 time: 1.3026 data_time: 0.0083 memory: 16131 loss: 0.1357 tflops: 5.3411 tokens_per_sec: 88.2861 +2025/09/25 21:50:13 - mmengine - INFO - Iter(train) [4010/7452] base_lr: 1.7490e-06 lr: 1.7490e-06 eta: 1:08:35 time: 1.0028 data_time: 0.0081 memory: 16131 loss: 0.0074 tflops: 7.7835 tokens_per_sec: 128.6345 +2025/09/25 21:50:23 - mmengine - INFO - Iter(train) [4020/7452] base_lr: 1.7475e-06 lr: 1.7475e-06 eta: 1:08:21 time: 1.0126 data_time: 0.0082 memory: 16130 loss: 0.0076 tflops: 6.4520 tokens_per_sec: 106.6591 +2025/09/25 21:50:34 - mmengine - INFO - Iter(train) [4030/7452] base_lr: 1.7461e-06 lr: 1.7461e-06 eta: 1:08:09 time: 1.2564 data_time: 0.0084 memory: 16131 loss: 0.0009 tflops: 5.4411 tokens_per_sec: 89.9419 +2025/09/25 21:50:45 - mmengine - INFO - Iter(train) [4040/7452] base_lr: 1.7446e-06 lr: 1.7446e-06 eta: 1:07:56 time: 1.0217 data_time: 0.0083 memory: 16131 loss: 0.0381 tflops: 5.2683 tokens_per_sec: 87.1130 +2025/09/25 21:50:56 - mmengine - INFO - Iter(train) [4050/7452] base_lr: 1.7431e-06 lr: 1.7431e-06 eta: 1:07:43 time: 1.0146 data_time: 0.0082 memory: 16131 loss: 2.3594 tflops: 4.4698 tokens_per_sec: 73.9230 +2025/09/25 21:51:07 - mmengine - INFO - Iter(train) [4060/7452] base_lr: 1.7416e-06 lr: 1.7416e-06 eta: 1:07:30 time: 1.0133 data_time: 0.0081 memory: 16130 loss: 2.4375 tflops: 6.6867 tokens_per_sec: 110.5338 +2025/09/25 21:51:17 - mmengine - INFO - Iter(train) [4070/7452] base_lr: 1.7401e-06 lr: 1.7401e-06 eta: 1:07:17 time: 1.0125 data_time: 0.0083 memory: 16130 loss: 0.0082 tflops: 6.6316 tokens_per_sec: 109.6250 +2025/09/25 21:51:28 - mmengine - INFO - Iter(train) [4080/7452] base_lr: 1.7386e-06 lr: 1.7386e-06 eta: 1:07:04 time: 1.2075 data_time: 0.0084 memory: 16130 loss: 0.0374 tflops: 4.7582 tokens_per_sec: 78.6719 +2025/09/25 21:51:39 - mmengine - INFO - Iter(train) [4090/7452] base_lr: 1.7371e-06 lr: 1.7371e-06 eta: 1:06:51 time: 0.9800 data_time: 0.0082 memory: 16131 loss: 1.5625 tflops: 4.7511 tokens_per_sec: 78.5739 +2025/09/25 21:51:45 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 21:51:45 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:51:53 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of necrosis or hemorrhage. The tumor cells are well-differentiated and show a cohesive growth pattern.<|im_end|> + +2025/09/25 21:51:53 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:52:03 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the tissue sample is composed of a mixture of benign and malignant cells. The malignant cells are characterized by their irregular nuclear morphology, hyperchromatic nuclei, and pleomorphic features. The benign cells exhibit a more uniform nuclear appearance. The report also notes the presence of necrosis, which is a significant finding as it suggests the potential for aggressive behavior. Additionally, the report mentions the presence of vascular invasion, indicating that the cancer has spread into the blood vessels, which is a concerning feature. The report concludes by stating that the overall assessment is of invasive ductal carcinoma, which is a type of breast cancer.<|im_end|> + +2025/09/25 21:52:03 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 21:52:04 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 21:52:04 - mmengine - INFO - Saving checkpoint at 4096 iterations +2025/09/25 21:53:30 - mmengine - INFO - Iter(train) [4100/7452] base_lr: 1.7356e-06 lr: 1.7356e-06 eta: 1:08:01 time: 1.0602 data_time: 0.0083 memory: 16131 loss: 0.0713 tflops: 6.3336 tokens_per_sec: 104.6983 +2025/09/25 21:53:40 - mmengine - INFO - Iter(train) [4110/7452] base_lr: 1.7341e-06 lr: 1.7341e-06 eta: 1:07:47 time: 1.0191 data_time: 0.0086 memory: 16131 loss: 1.5312 tflops: 3.6780 tokens_per_sec: 60.8388 +2025/09/25 21:53:51 - mmengine - INFO - Iter(train) [4120/7452] base_lr: 1.7326e-06 lr: 1.7326e-06 eta: 1:07:34 time: 1.0052 data_time: 0.0084 memory: 16131 loss: 0.0094 tflops: 5.5957 tokens_per_sec: 92.5224 +2025/09/25 21:54:02 - mmengine - INFO - Iter(train) [4130/7452] base_lr: 1.7311e-06 lr: 1.7311e-06 eta: 1:07:20 time: 1.0352 data_time: 0.0090 memory: 16131 loss: 0.0806 tflops: 6.5452 tokens_per_sec: 108.1946 +2025/09/25 21:54:13 - mmengine - INFO - Iter(train) [4140/7452] base_lr: 1.7296e-06 lr: 1.7296e-06 eta: 1:07:07 time: 1.0226 data_time: 0.0086 memory: 16131 loss: 0.0007 tflops: 7.6921 tokens_per_sec: 127.1224 +2025/09/25 21:54:24 - mmengine - INFO - Iter(train) [4150/7452] base_lr: 1.7281e-06 lr: 1.7281e-06 eta: 1:06:54 time: 1.0069 data_time: 0.0083 memory: 16131 loss: 0.0074 tflops: 6.3077 tokens_per_sec: 104.2781 +2025/09/25 21:54:34 - mmengine - INFO - Iter(train) [4160/7452] base_lr: 1.7266e-06 lr: 1.7266e-06 eta: 1:06:40 time: 1.2812 data_time: 0.0086 memory: 16131 loss: 0.0017 tflops: 5.4303 tokens_per_sec: 89.7613 +2025/09/25 21:54:45 - mmengine - INFO - Iter(train) [4170/7452] base_lr: 1.7250e-06 lr: 1.7250e-06 eta: 1:06:27 time: 1.0333 data_time: 0.0084 memory: 16131 loss: 2.0625 tflops: 7.4956 tokens_per_sec: 123.8786 +2025/09/25 21:54:56 - mmengine - INFO - Iter(train) [4180/7452] base_lr: 1.7235e-06 lr: 1.7235e-06 eta: 1:06:14 time: 1.1725 data_time: 0.0083 memory: 16130 loss: 0.0150 tflops: 6.8125 tokens_per_sec: 112.5827 +2025/09/25 21:55:06 - mmengine - INFO - Iter(train) [4190/7452] base_lr: 1.7220e-06 lr: 1.7220e-06 eta: 1:06:00 time: 1.0034 data_time: 0.0079 memory: 16131 loss: 0.0757 tflops: 7.0545 tokens_per_sec: 116.6064 +2025/09/25 21:55:16 - mmengine - INFO - Iter(train) [4200/7452] base_lr: 1.7204e-06 lr: 1.7204e-06 eta: 1:05:47 time: 0.9935 data_time: 0.0088 memory: 16130 loss: 0.0056 tflops: 6.6368 tokens_per_sec: 109.7131 +2025/09/25 21:55:27 - mmengine - INFO - Iter(train) [4210/7452] base_lr: 1.7189e-06 lr: 1.7189e-06 eta: 1:05:34 time: 1.0220 data_time: 0.0085 memory: 16131 loss: 1.9844 tflops: 5.3260 tokens_per_sec: 88.0661 +2025/09/25 21:55:38 - mmengine - INFO - Iter(train) [4220/7452] base_lr: 1.7173e-06 lr: 1.7173e-06 eta: 1:05:20 time: 1.0236 data_time: 0.0088 memory: 16131 loss: 1.8672 tflops: 7.5075 tokens_per_sec: 124.0775 +2025/09/25 21:55:49 - mmengine - INFO - Iter(train) [4230/7452] base_lr: 1.7158e-06 lr: 1.7158e-06 eta: 1:05:07 time: 1.2350 data_time: 0.0088 memory: 16131 loss: 0.0027 tflops: 5.9767 tokens_per_sec: 98.7833 +2025/09/25 21:56:00 - mmengine - INFO - Iter(train) [4240/7452] base_lr: 1.7142e-06 lr: 1.7142e-06 eta: 1:04:54 time: 1.0691 data_time: 0.0124 memory: 16131 loss: 2.0000 tflops: 5.5444 tokens_per_sec: 91.6678 +2025/09/25 21:56:10 - mmengine - INFO - Iter(train) [4250/7452] base_lr: 1.7127e-06 lr: 1.7127e-06 eta: 1:04:41 time: 0.9948 data_time: 0.0083 memory: 16130 loss: 1.7344 tflops: 3.7679 tokens_per_sec: 62.3263 +2025/09/25 21:56:21 - mmengine - INFO - Iter(train) [4260/7452] base_lr: 1.7111e-06 lr: 1.7111e-06 eta: 1:04:28 time: 0.9993 data_time: 0.0074 memory: 16130 loss: 0.0309 tflops: 6.8405 tokens_per_sec: 113.0750 +2025/09/25 21:56:31 - mmengine - INFO - Iter(train) [4270/7452] base_lr: 1.7096e-06 lr: 1.7096e-06 eta: 1:04:14 time: 1.0132 data_time: 0.0085 memory: 16131 loss: 1.4219 tflops: 5.5513 tokens_per_sec: 91.7877 +2025/09/25 21:56:42 - mmengine - INFO - Iter(train) [4280/7452] base_lr: 1.7080e-06 lr: 1.7080e-06 eta: 1:04:01 time: 0.9968 data_time: 0.0084 memory: 16130 loss: 0.0027 tflops: 7.2226 tokens_per_sec: 119.3813 +2025/09/25 21:56:53 - mmengine - INFO - Iter(train) [4290/7452] base_lr: 1.7064e-06 lr: 1.7064e-06 eta: 1:03:48 time: 1.2332 data_time: 0.0084 memory: 16131 loss: 0.0165 tflops: 6.0837 tokens_per_sec: 100.5504 +2025/09/25 21:57:04 - mmengine - INFO - Iter(train) [4300/7452] base_lr: 1.7049e-06 lr: 1.7049e-06 eta: 1:03:35 time: 1.0081 data_time: 0.0084 memory: 16131 loss: 0.0093 tflops: 6.8410 tokens_per_sec: 113.0812 +2025/09/25 21:57:14 - mmengine - INFO - Iter(train) [4310/7452] base_lr: 1.7033e-06 lr: 1.7033e-06 eta: 1:03:22 time: 1.0132 data_time: 0.0087 memory: 16131 loss: 0.0181 tflops: 7.2851 tokens_per_sec: 120.4093 +2025/09/25 21:57:25 - mmengine - INFO - Iter(train) [4320/7452] base_lr: 1.7017e-06 lr: 1.7017e-06 eta: 1:03:09 time: 0.9960 data_time: 0.0087 memory: 16131 loss: 1.1797 tflops: 4.9783 tokens_per_sec: 82.3254 +2025/09/25 21:57:36 - mmengine - INFO - Iter(train) [4330/7452] base_lr: 1.7001e-06 lr: 1.7001e-06 eta: 1:02:56 time: 1.0173 data_time: 0.0086 memory: 16131 loss: 1.4766 tflops: 4.9340 tokens_per_sec: 81.5924 +2025/09/25 21:57:46 - mmengine - INFO - Iter(train) [4340/7452] base_lr: 1.6985e-06 lr: 1.6985e-06 eta: 1:02:43 time: 1.0085 data_time: 0.0086 memory: 16131 loss: 1.4453 tflops: 5.4571 tokens_per_sec: 90.2331 +2025/09/25 21:57:57 - mmengine - INFO - Iter(train) [4350/7452] base_lr: 1.6970e-06 lr: 1.6970e-06 eta: 1:02:30 time: 1.2023 data_time: 0.0083 memory: 16131 loss: 1.2969 tflops: 4.1242 tokens_per_sec: 68.2023 +2025/09/25 21:58:09 - mmengine - INFO - Iter(train) [4360/7452] base_lr: 1.6954e-06 lr: 1.6954e-06 eta: 1:02:17 time: 1.2666 data_time: 0.0084 memory: 16132 loss: 2.1406 tflops: 8.8920 tokens_per_sec: 146.8441 +2025/09/25 21:58:19 - mmengine - INFO - Iter(train) [4370/7452] base_lr: 1.6938e-06 lr: 1.6938e-06 eta: 1:02:04 time: 1.0084 data_time: 0.0083 memory: 16131 loss: 0.0659 tflops: 5.7579 tokens_per_sec: 95.2006 +2025/09/25 21:58:30 - mmengine - INFO - Iter(train) [4380/7452] base_lr: 1.6922e-06 lr: 1.6922e-06 eta: 1:01:51 time: 1.0175 data_time: 0.0083 memory: 16131 loss: 1.0234 tflops: 4.2188 tokens_per_sec: 69.7755 +2025/09/25 21:58:41 - mmengine - INFO - Iter(train) [4390/7452] base_lr: 1.6906e-06 lr: 1.6906e-06 eta: 1:01:38 time: 1.2877 data_time: 0.0088 memory: 16130 loss: 0.1094 tflops: 4.6032 tokens_per_sec: 76.1068 +2025/09/25 21:58:51 - mmengine - INFO - Iter(train) [4400/7452] base_lr: 1.6890e-06 lr: 1.6890e-06 eta: 1:01:25 time: 1.0156 data_time: 0.0087 memory: 16130 loss: 1.0391 tflops: 4.2863 tokens_per_sec: 70.8922 +2025/09/25 21:59:02 - mmengine - INFO - Iter(train) [4410/7452] base_lr: 1.6873e-06 lr: 1.6873e-06 eta: 1:01:12 time: 1.2554 data_time: 0.0087 memory: 16130 loss: 0.1240 tflops: 5.2041 tokens_per_sec: 86.0298 +2025/09/25 21:59:14 - mmengine - INFO - Iter(train) [4420/7452] base_lr: 1.6857e-06 lr: 1.6857e-06 eta: 1:00:59 time: 1.0613 data_time: 0.0085 memory: 16130 loss: 0.0043 tflops: 6.9550 tokens_per_sec: 114.9532 +2025/09/25 21:59:24 - mmengine - INFO - Iter(train) [4430/7452] base_lr: 1.6841e-06 lr: 1.6841e-06 eta: 1:00:46 time: 1.2404 data_time: 0.0087 memory: 16131 loss: 2.4062 tflops: 4.5833 tokens_per_sec: 75.7826 +2025/09/25 21:59:35 - mmengine - INFO - Iter(train) [4440/7452] base_lr: 1.6825e-06 lr: 1.6825e-06 eta: 1:00:33 time: 1.2300 data_time: 0.0088 memory: 16131 loss: 2.4531 tflops: 3.0964 tokens_per_sec: 51.2186 +2025/09/25 21:59:46 - mmengine - INFO - Iter(train) [4450/7452] base_lr: 1.6809e-06 lr: 1.6809e-06 eta: 1:00:20 time: 1.0397 data_time: 0.0083 memory: 16131 loss: 0.1187 tflops: 7.3327 tokens_per_sec: 121.1892 +2025/09/25 21:59:57 - mmengine - INFO - Iter(train) [4460/7452] base_lr: 1.6792e-06 lr: 1.6792e-06 eta: 1:00:07 time: 1.0170 data_time: 0.0084 memory: 16131 loss: 1.6406 tflops: 6.1259 tokens_per_sec: 101.2748 +2025/09/25 22:00:08 - mmengine - INFO - Iter(train) [4470/7452] base_lr: 1.6776e-06 lr: 1.6776e-06 eta: 0:59:55 time: 1.0083 data_time: 0.0081 memory: 16130 loss: 0.0432 tflops: 7.4405 tokens_per_sec: 122.9748 +2025/09/25 22:00:19 - mmengine - INFO - Iter(train) [4480/7452] base_lr: 1.6760e-06 lr: 1.6760e-06 eta: 0:59:42 time: 1.2789 data_time: 0.0088 memory: 16131 loss: 0.0054 tflops: 5.6293 tokens_per_sec: 93.0460 +2025/09/25 22:00:29 - mmengine - INFO - Iter(train) [4490/7452] base_lr: 1.6743e-06 lr: 1.6743e-06 eta: 0:59:29 time: 1.2379 data_time: 0.0083 memory: 16131 loss: 2.2656 tflops: 6.3054 tokens_per_sec: 104.2074 +2025/09/25 22:00:40 - mmengine - INFO - Iter(train) [4500/7452] base_lr: 1.6727e-06 lr: 1.6727e-06 eta: 0:59:16 time: 1.0152 data_time: 0.0083 memory: 16131 loss: 0.0009 tflops: 7.6287 tokens_per_sec: 126.0781 +2025/09/25 22:00:50 - mmengine - INFO - Iter(train) [4510/7452] base_lr: 1.6711e-06 lr: 1.6711e-06 eta: 0:59:03 time: 1.0254 data_time: 0.0085 memory: 16130 loss: 1.9219 tflops: 6.7849 tokens_per_sec: 112.1525 +2025/09/25 22:01:01 - mmengine - INFO - Iter(train) [4520/7452] base_lr: 1.6694e-06 lr: 1.6694e-06 eta: 0:58:50 time: 1.2872 data_time: 0.0083 memory: 16131 loss: 2.4062 tflops: 3.5699 tokens_per_sec: 59.0406 +2025/09/25 22:01:12 - mmengine - INFO - Iter(train) [4530/7452] base_lr: 1.6678e-06 lr: 1.6678e-06 eta: 0:58:37 time: 1.2138 data_time: 0.0082 memory: 16131 loss: 0.0481 tflops: 5.0828 tokens_per_sec: 84.0320 +2025/09/25 22:01:23 - mmengine - INFO - Iter(train) [4540/7452] base_lr: 1.6661e-06 lr: 1.6661e-06 eta: 0:58:24 time: 1.0247 data_time: 0.0081 memory: 16130 loss: 1.8672 tflops: 4.3075 tokens_per_sec: 71.2419 +2025/09/25 22:01:34 - mmengine - INFO - Iter(train) [4550/7452] base_lr: 1.6645e-06 lr: 1.6645e-06 eta: 0:58:11 time: 1.0426 data_time: 0.0083 memory: 16130 loss: 1.2422 tflops: 4.5237 tokens_per_sec: 74.8129 +2025/09/25 22:01:44 - mmengine - INFO - Iter(train) [4560/7452] base_lr: 1.6628e-06 lr: 1.6628e-06 eta: 0:57:59 time: 1.2300 data_time: 0.0083 memory: 16131 loss: 0.0693 tflops: 5.3606 tokens_per_sec: 88.6164 +2025/09/25 22:01:55 - mmengine - INFO - Iter(train) [4570/7452] base_lr: 1.6611e-06 lr: 1.6611e-06 eta: 0:57:46 time: 1.0092 data_time: 0.0085 memory: 16130 loss: 0.0549 tflops: 7.0136 tokens_per_sec: 115.9291 +2025/09/25 22:02:06 - mmengine - INFO - Iter(train) [4580/7452] base_lr: 1.6595e-06 lr: 1.6595e-06 eta: 0:57:33 time: 1.0416 data_time: 0.0090 memory: 16131 loss: 1.0938 tflops: 4.1795 tokens_per_sec: 69.1262 +2025/09/25 22:02:17 - mmengine - INFO - Iter(train) [4590/7452] base_lr: 1.6578e-06 lr: 1.6578e-06 eta: 0:57:20 time: 1.0020 data_time: 0.0074 memory: 16131 loss: 0.0043 tflops: 6.6408 tokens_per_sec: 109.7780 +2025/09/25 22:02:28 - mmengine - INFO - Iter(train) [4600/7452] base_lr: 1.6561e-06 lr: 1.6561e-06 eta: 0:57:08 time: 1.0172 data_time: 0.0084 memory: 16131 loss: 0.0179 tflops: 8.4486 tokens_per_sec: 139.6037 +2025/09/25 22:02:36 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 22:02:36 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:03:26 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of atypia or architectural distortion. The luminal surface of the tumor cells is lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells. The luminal surface of the tumor cells is not lined by a single layer of cuboidal or columnar cells + +2025/09/25 22:03:26 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:03:37 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the tissue sample is composed of a mixture of benign and malignant cells. The malignant cells are characterized by a high nuclear grade, indicating a more aggressive and potentially more dangerous nature of the cancer. The report also notes the presence of necrosis, which is a sign of tumor invasion and growth. Additionally, the report mentions the presence of lymphovascular invasion, suggesting that the cancer has the potential to spread to other parts of the body through the lymphatic system. The report concludes by stating that the tumor is classified as invasive ductal carcinoma, which is a type of breast cancer that begins in the milk ducts and invades surrounding breast tissue.<|im_end|> + +2025/09/25 22:03:37 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:03:38 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 22:04:11 - mmengine - INFO - Iter(train) [4610/7452] base_lr: 1.6544e-06 lr: 1.6544e-06 eta: 0:57:52 time: 1.0270 data_time: 0.0087 memory: 16130 loss: 0.0055 tflops: 7.4232 tokens_per_sec: 122.6852 +2025/09/25 22:04:22 - mmengine - INFO - Iter(train) [4620/7452] base_lr: 1.6528e-06 lr: 1.6528e-06 eta: 0:57:39 time: 1.2936 data_time: 0.0084 memory: 16131 loss: 0.0820 tflops: 5.4252 tokens_per_sec: 89.6753 +2025/09/25 22:04:33 - mmengine - INFO - Iter(train) [4630/7452] base_lr: 1.6511e-06 lr: 1.6511e-06 eta: 0:57:26 time: 1.0060 data_time: 0.0085 memory: 16131 loss: 0.0012 tflops: 6.1326 tokens_per_sec: 101.3871 +2025/09/25 22:04:44 - mmengine - INFO - Iter(train) [4640/7452] base_lr: 1.6494e-06 lr: 1.6494e-06 eta: 0:57:13 time: 1.0087 data_time: 0.0084 memory: 16131 loss: 0.0972 tflops: 6.9574 tokens_per_sec: 115.0017 +2025/09/25 22:04:55 - mmengine - INFO - Iter(train) [4650/7452] base_lr: 1.6477e-06 lr: 1.6477e-06 eta: 0:57:00 time: 0.9992 data_time: 0.0088 memory: 16131 loss: 0.1094 tflops: 7.2662 tokens_per_sec: 120.0994 +2025/09/25 22:05:06 - mmengine - INFO - Iter(train) [4660/7452] base_lr: 1.6460e-06 lr: 1.6460e-06 eta: 0:56:47 time: 1.0095 data_time: 0.0084 memory: 16131 loss: 1.9141 tflops: 7.0716 tokens_per_sec: 116.8867 +2025/09/25 22:05:16 - mmengine - INFO - Iter(train) [4670/7452] base_lr: 1.6443e-06 lr: 1.6443e-06 eta: 0:56:34 time: 1.2294 data_time: 0.0083 memory: 16130 loss: 0.0106 tflops: 5.5603 tokens_per_sec: 91.9126 +2025/09/25 22:05:27 - mmengine - INFO - Iter(train) [4680/7452] base_lr: 1.6426e-06 lr: 1.6426e-06 eta: 0:56:21 time: 1.0288 data_time: 0.0088 memory: 16131 loss: 1.9922 tflops: 8.2355 tokens_per_sec: 136.0857 +2025/09/25 22:05:38 - mmengine - INFO - Iter(train) [4690/7452] base_lr: 1.6409e-06 lr: 1.6409e-06 eta: 0:56:08 time: 1.2126 data_time: 0.0082 memory: 16132 loss: 2.0938 tflops: 8.7376 tokens_per_sec: 144.3144 +2025/09/25 22:05:48 - mmengine - INFO - Iter(train) [4700/7452] base_lr: 1.6392e-06 lr: 1.6392e-06 eta: 0:55:54 time: 0.9878 data_time: 0.0084 memory: 16131 loss: 0.0114 tflops: 6.6749 tokens_per_sec: 110.3436 +2025/09/25 22:05:59 - mmengine - INFO - Iter(train) [4710/7452] base_lr: 1.6375e-06 lr: 1.6375e-06 eta: 0:55:42 time: 1.3465 data_time: 0.0087 memory: 16131 loss: 2.3906 tflops: 3.5027 tokens_per_sec: 57.9263 +2025/09/25 22:06:10 - mmengine - INFO - Iter(train) [4720/7452] base_lr: 1.6358e-06 lr: 1.6358e-06 eta: 0:55:29 time: 1.0201 data_time: 0.0087 memory: 16131 loss: 2.2344 tflops: 7.7110 tokens_per_sec: 127.4349 +2025/09/25 22:06:21 - mmengine - INFO - Iter(train) [4730/7452] base_lr: 1.6341e-06 lr: 1.6341e-06 eta: 0:55:15 time: 1.0122 data_time: 0.0081 memory: 16131 loss: 0.0019 tflops: 7.4125 tokens_per_sec: 122.5111 +2025/09/25 22:06:32 - mmengine - INFO - Iter(train) [4740/7452] base_lr: 1.6324e-06 lr: 1.6324e-06 eta: 0:55:03 time: 1.0169 data_time: 0.0085 memory: 16130 loss: 0.0310 tflops: 6.3053 tokens_per_sec: 104.2366 +2025/09/25 22:06:43 - mmengine - INFO - Iter(train) [4750/7452] base_lr: 1.6307e-06 lr: 1.6307e-06 eta: 0:54:50 time: 1.2294 data_time: 0.0083 memory: 16131 loss: 0.2559 tflops: 5.6097 tokens_per_sec: 92.7282 +2025/09/25 22:06:53 - mmengine - INFO - Iter(train) [4760/7452] base_lr: 1.6289e-06 lr: 1.6289e-06 eta: 0:54:37 time: 1.0071 data_time: 0.0085 memory: 16131 loss: 0.0046 tflops: 7.0888 tokens_per_sec: 117.1708 +2025/09/25 22:07:05 - mmengine - INFO - Iter(train) [4770/7452] base_lr: 1.6272e-06 lr: 1.6272e-06 eta: 0:54:24 time: 1.0332 data_time: 0.0083 memory: 16131 loss: 0.0034 tflops: 6.4405 tokens_per_sec: 106.4659 +2025/09/25 22:07:15 - mmengine - INFO - Iter(train) [4780/7452] base_lr: 1.6255e-06 lr: 1.6255e-06 eta: 0:54:11 time: 1.0169 data_time: 0.0089 memory: 16132 loss: 1.7031 tflops: 6.7823 tokens_per_sec: 112.1109 +2025/09/25 22:07:26 - mmengine - INFO - Iter(train) [4790/7452] base_lr: 1.6237e-06 lr: 1.6237e-06 eta: 0:53:58 time: 1.0357 data_time: 0.0087 memory: 16131 loss: 1.2891 tflops: 4.4372 tokens_per_sec: 73.3837 +2025/09/25 22:07:37 - mmengine - INFO - Iter(train) [4800/7452] base_lr: 1.6220e-06 lr: 1.6220e-06 eta: 0:53:45 time: 1.2789 data_time: 0.0086 memory: 16131 loss: 0.0048 tflops: 5.9138 tokens_per_sec: 97.7401 +2025/09/25 22:07:47 - mmengine - INFO - Iter(train) [4810/7452] base_lr: 1.6203e-06 lr: 1.6203e-06 eta: 0:53:32 time: 1.0277 data_time: 0.0084 memory: 16131 loss: 0.0952 tflops: 6.5930 tokens_per_sec: 108.9841 +2025/09/25 22:07:58 - mmengine - INFO - Iter(train) [4820/7452] base_lr: 1.6185e-06 lr: 1.6185e-06 eta: 0:53:19 time: 1.0440 data_time: 0.0086 memory: 16131 loss: 1.1797 tflops: 3.9380 tokens_per_sec: 65.1339 +2025/09/25 22:08:09 - mmengine - INFO - Iter(train) [4830/7452] base_lr: 1.6168e-06 lr: 1.6168e-06 eta: 0:53:06 time: 1.0057 data_time: 0.0090 memory: 16130 loss: 1.6953 tflops: 6.1344 tokens_per_sec: 101.4175 +2025/09/25 22:08:20 - mmengine - INFO - Iter(train) [4840/7452] base_lr: 1.6150e-06 lr: 1.6150e-06 eta: 0:52:53 time: 1.0164 data_time: 0.0086 memory: 16131 loss: 0.1943 tflops: 6.9645 tokens_per_sec: 115.1176 +2025/09/25 22:08:31 - mmengine - INFO - Iter(train) [4850/7452] base_lr: 1.6133e-06 lr: 1.6133e-06 eta: 0:52:41 time: 1.2629 data_time: 0.0080 memory: 16131 loss: 0.0004 tflops: 5.0292 tokens_per_sec: 83.1429 +2025/09/25 22:08:41 - mmengine - INFO - Iter(train) [4860/7452] base_lr: 1.6115e-06 lr: 1.6115e-06 eta: 0:52:28 time: 1.2269 data_time: 0.0088 memory: 16130 loss: 1.7891 tflops: 3.7949 tokens_per_sec: 62.7595 +2025/09/25 22:08:52 - mmengine - INFO - Iter(train) [4870/7452] base_lr: 1.6098e-06 lr: 1.6098e-06 eta: 0:52:15 time: 1.0040 data_time: 0.0085 memory: 16132 loss: 0.0040 tflops: 7.1104 tokens_per_sec: 117.5287 +2025/09/25 22:09:03 - mmengine - INFO - Iter(train) [4880/7452] base_lr: 1.6080e-06 lr: 1.6080e-06 eta: 0:52:02 time: 1.0337 data_time: 0.0087 memory: 16130 loss: 1.8516 tflops: 4.6211 tokens_per_sec: 76.4226 +2025/09/25 22:09:13 - mmengine - INFO - Iter(train) [4890/7452] base_lr: 1.6062e-06 lr: 1.6062e-06 eta: 0:51:49 time: 1.0330 data_time: 0.0083 memory: 16131 loss: 0.0272 tflops: 7.3217 tokens_per_sec: 121.0101 +2025/09/25 22:09:24 - mmengine - INFO - Iter(train) [4900/7452] base_lr: 1.6045e-06 lr: 1.6045e-06 eta: 0:51:36 time: 1.0013 data_time: 0.0087 memory: 16131 loss: 0.0024 tflops: 8.2800 tokens_per_sec: 136.8266 +2025/09/25 22:09:35 - mmengine - INFO - Iter(train) [4910/7452] base_lr: 1.6027e-06 lr: 1.6027e-06 eta: 0:51:23 time: 1.0119 data_time: 0.0094 memory: 16131 loss: 1.5000 tflops: 3.8837 tokens_per_sec: 64.2384 +2025/09/25 22:09:46 - mmengine - INFO - Iter(train) [4920/7452] base_lr: 1.6009e-06 lr: 1.6009e-06 eta: 0:51:10 time: 1.2796 data_time: 0.0090 memory: 16131 loss: 0.0557 tflops: 5.2002 tokens_per_sec: 85.9641 +2025/09/25 22:09:57 - mmengine - INFO - Iter(train) [4930/7452] base_lr: 1.5992e-06 lr: 1.5992e-06 eta: 0:50:58 time: 0.9901 data_time: 0.0087 memory: 16130 loss: 1.8672 tflops: 6.1089 tokens_per_sec: 100.9992 +2025/09/25 22:10:08 - mmengine - INFO - Iter(train) [4940/7452] base_lr: 1.5974e-06 lr: 1.5974e-06 eta: 0:50:45 time: 1.2480 data_time: 0.0082 memory: 16131 loss: 1.9609 tflops: 7.5660 tokens_per_sec: 124.9963 +2025/09/25 22:10:19 - mmengine - INFO - Iter(train) [4950/7452] base_lr: 1.5956e-06 lr: 1.5956e-06 eta: 0:50:32 time: 1.2269 data_time: 0.0085 memory: 16131 loss: 0.0718 tflops: 5.3247 tokens_per_sec: 88.0238 +2025/09/25 22:10:29 - mmengine - INFO - Iter(train) [4960/7452] base_lr: 1.5938e-06 lr: 1.5938e-06 eta: 0:50:19 time: 1.0135 data_time: 0.0092 memory: 16131 loss: 1.8125 tflops: 6.4458 tokens_per_sec: 106.5574 +2025/09/25 22:10:40 - mmengine - INFO - Iter(train) [4970/7452] base_lr: 1.5920e-06 lr: 1.5920e-06 eta: 0:50:07 time: 1.0145 data_time: 0.0089 memory: 16131 loss: 0.0398 tflops: 7.2159 tokens_per_sec: 119.2680 +2025/09/25 22:10:51 - mmengine - INFO - Iter(train) [4980/7452] base_lr: 1.5902e-06 lr: 1.5902e-06 eta: 0:49:54 time: 1.0384 data_time: 0.0085 memory: 16130 loss: 0.0591 tflops: 6.0579 tokens_per_sec: 100.1504 +2025/09/25 22:11:01 - mmengine - INFO - Iter(train) [4990/7452] base_lr: 1.5884e-06 lr: 1.5884e-06 eta: 0:49:41 time: 1.0359 data_time: 0.0088 memory: 16131 loss: 0.0006 tflops: 9.1743 tokens_per_sec: 151.5644 +2025/09/25 22:11:12 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 22:11:12 - mmengine - INFO - Iter(train) [5000/7452] base_lr: 1.5867e-06 lr: 1.5867e-06 eta: 0:49:28 time: 1.0357 data_time: 0.0085 memory: 16130 loss: 1.6719 tflops: 3.4436 tokens_per_sec: 56.9648 +2025/09/25 22:11:23 - mmengine - INFO - Iter(train) [5010/7452] base_lr: 1.5849e-06 lr: 1.5849e-06 eta: 0:49:16 time: 1.0357 data_time: 0.0087 memory: 16130 loss: 0.0396 tflops: 6.3664 tokens_per_sec: 105.2438 +2025/09/25 22:11:34 - mmengine - INFO - Iter(train) [5020/7452] base_lr: 1.5831e-06 lr: 1.5831e-06 eta: 0:49:03 time: 1.0139 data_time: 0.0093 memory: 16130 loss: 2.0781 tflops: 4.2340 tokens_per_sec: 70.0271 +2025/09/25 22:11:45 - mmengine - INFO - Iter(train) [5030/7452] base_lr: 1.5813e-06 lr: 1.5813e-06 eta: 0:48:50 time: 1.0105 data_time: 0.0086 memory: 16131 loss: 0.0037 tflops: 6.7050 tokens_per_sec: 110.8356 +2025/09/25 22:11:56 - mmengine - INFO - Iter(train) [5040/7452] base_lr: 1.5794e-06 lr: 1.5794e-06 eta: 0:48:37 time: 1.0456 data_time: 0.0082 memory: 16130 loss: 0.0540 tflops: 6.4222 tokens_per_sec: 106.1623 +2025/09/25 22:12:07 - mmengine - INFO - Iter(train) [5050/7452] base_lr: 1.5776e-06 lr: 1.5776e-06 eta: 0:48:25 time: 1.2640 data_time: 0.0089 memory: 16131 loss: 0.0522 tflops: 7.5667 tokens_per_sec: 125.0042 +2025/09/25 22:12:17 - mmengine - INFO - Iter(train) [5060/7452] base_lr: 1.5758e-06 lr: 1.5758e-06 eta: 0:48:12 time: 1.0002 data_time: 0.0087 memory: 16131 loss: 0.0018 tflops: 7.5614 tokens_per_sec: 124.9716 +2025/09/25 22:12:28 - mmengine - INFO - Iter(train) [5070/7452] base_lr: 1.5740e-06 lr: 1.5740e-06 eta: 0:47:59 time: 1.1108 data_time: 0.0084 memory: 16131 loss: 0.0004 tflops: 7.7912 tokens_per_sec: 128.7390 +2025/09/25 22:12:40 - mmengine - INFO - Iter(train) [5080/7452] base_lr: 1.5722e-06 lr: 1.5722e-06 eta: 0:47:47 time: 1.0198 data_time: 0.0086 memory: 16130 loss: 0.0009 tflops: 7.5948 tokens_per_sec: 125.5189 +2025/09/25 22:12:50 - mmengine - INFO - Iter(train) [5090/7452] base_lr: 1.5704e-06 lr: 1.5704e-06 eta: 0:47:34 time: 1.0162 data_time: 0.0085 memory: 16131 loss: 0.0352 tflops: 9.5904 tokens_per_sec: 158.4294 +2025/09/25 22:13:01 - mmengine - INFO - Iter(train) [5100/7452] base_lr: 1.5685e-06 lr: 1.5685e-06 eta: 0:47:21 time: 1.0181 data_time: 0.0081 memory: 16130 loss: 1.7578 tflops: 4.3947 tokens_per_sec: 72.6832 +2025/09/25 22:13:12 - mmengine - INFO - Iter(train) [5110/7452] base_lr: 1.5667e-06 lr: 1.5667e-06 eta: 0:47:09 time: 1.2327 data_time: 0.0087 memory: 16130 loss: 1.8672 tflops: 3.8260 tokens_per_sec: 63.2734 +2025/09/25 22:13:23 - mmengine - INFO - Iter(train) [5120/7452] base_lr: 1.5649e-06 lr: 1.5649e-06 eta: 0:46:56 time: 1.2964 data_time: 0.0086 memory: 16130 loss: 0.0481 tflops: 5.0395 tokens_per_sec: 83.3091 +2025/09/25 22:13:23 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 22:13:23 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:13:31 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of irregularity or distortion. The tumor cells are organized in a lobulated pattern, with the luminal surface facing towards the center of the lobule.<|im_end|> + +2025/09/25 22:13:31 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:13:46 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the tumor is characterized by a high-grade, invasive adenocarcinoma. The tumor cells are described as exhibiting marked pleomorphism, with irregular nuclear contours and prominent nucleoli. The tumor is noted to be predominantly solid, with areas of glandular and cribriform differentiation. The tumor cells are arranged in sheets and cords, with areas of necrosis and hemorrhage observed. The tumor is also noted to have areas of necrosis and hemorrhage. The tumor cells are positive for estrogen receptor (ER) and progesterone receptor (PR), and negative for HER2/neu. The tumor is classified as T3, N1, M0, indicating that it has invaded the endometrium and cervical stroma, with lymph node metastasis. The report also mentions the presence of a focal area of endometrial hyperplasia.<|im_end|> + +2025/09/25 22:13:46 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:13:47 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 22:14:42 - mmengine - INFO - Iter(train) [5130/7452] base_lr: 1.5631e-06 lr: 1.5631e-06 eta: 0:47:15 time: 1.0081 data_time: 0.0084 memory: 16130 loss: 0.0095 tflops: 6.5410 tokens_per_sec: 108.1288 +2025/09/25 22:14:53 - mmengine - INFO - Iter(train) [5140/7452] base_lr: 1.5612e-06 lr: 1.5612e-06 eta: 0:47:02 time: 1.2506 data_time: 0.0088 memory: 16130 loss: 0.0006 tflops: 6.3383 tokens_per_sec: 104.7483 +2025/09/25 22:15:04 - mmengine - INFO - Iter(train) [5150/7452] base_lr: 1.5594e-06 lr: 1.5594e-06 eta: 0:46:49 time: 1.0358 data_time: 0.0086 memory: 16131 loss: 0.0854 tflops: 7.0092 tokens_per_sec: 115.8524 +2025/09/25 22:15:15 - mmengine - INFO - Iter(train) [5160/7452] base_lr: 1.5575e-06 lr: 1.5575e-06 eta: 0:46:36 time: 1.0018 data_time: 0.0097 memory: 16132 loss: 1.2578 tflops: 4.2245 tokens_per_sec: 69.8714 +2025/09/25 22:15:26 - mmengine - INFO - Iter(train) [5170/7452] base_lr: 1.5557e-06 lr: 1.5557e-06 eta: 0:46:23 time: 1.2245 data_time: 0.0091 memory: 16131 loss: 2.3438 tflops: 4.3461 tokens_per_sec: 71.8653 +2025/09/25 22:15:36 - mmengine - INFO - Iter(train) [5180/7452] base_lr: 1.5539e-06 lr: 1.5539e-06 eta: 0:46:11 time: 1.0226 data_time: 0.0098 memory: 16132 loss: 0.0125 tflops: 7.5740 tokens_per_sec: 125.1744 +2025/09/25 22:15:47 - mmengine - INFO - Iter(train) [5190/7452] base_lr: 1.5520e-06 lr: 1.5520e-06 eta: 0:45:58 time: 1.0443 data_time: 0.0101 memory: 16131 loss: 0.0038 tflops: 8.6356 tokens_per_sec: 142.6788 +2025/09/25 22:15:58 - mmengine - INFO - Iter(train) [5200/7452] base_lr: 1.5502e-06 lr: 1.5502e-06 eta: 0:45:45 time: 1.2280 data_time: 0.0090 memory: 16131 loss: 1.6562 tflops: 3.4464 tokens_per_sec: 57.0016 +2025/09/25 22:16:09 - mmengine - INFO - Iter(train) [5210/7452] base_lr: 1.5483e-06 lr: 1.5483e-06 eta: 0:45:32 time: 1.0629 data_time: 0.0089 memory: 16131 loss: 0.0005 tflops: 5.9753 tokens_per_sec: 98.7838 +2025/09/25 22:16:19 - mmengine - INFO - Iter(train) [5220/7452] base_lr: 1.5464e-06 lr: 1.5464e-06 eta: 0:45:19 time: 1.0285 data_time: 0.0086 memory: 16131 loss: 2.0000 tflops: 7.6480 tokens_per_sec: 126.3948 +2025/09/25 22:16:30 - mmengine - INFO - Iter(train) [5230/7452] base_lr: 1.5446e-06 lr: 1.5446e-06 eta: 0:45:07 time: 1.2355 data_time: 0.0084 memory: 16131 loss: 0.1387 tflops: 6.2689 tokens_per_sec: 103.6045 +2025/09/25 22:16:41 - mmengine - INFO - Iter(train) [5240/7452] base_lr: 1.5427e-06 lr: 1.5427e-06 eta: 0:44:54 time: 1.0465 data_time: 0.0081 memory: 16131 loss: 2.3125 tflops: 4.5068 tokens_per_sec: 74.5335 +2025/09/25 22:16:52 - mmengine - INFO - Iter(train) [5250/7452] base_lr: 1.5409e-06 lr: 1.5409e-06 eta: 0:44:41 time: 1.0175 data_time: 0.0086 memory: 16131 loss: 1.3281 tflops: 5.0517 tokens_per_sec: 83.5371 +2025/09/25 22:17:03 - mmengine - INFO - Iter(train) [5260/7452] base_lr: 1.5390e-06 lr: 1.5390e-06 eta: 0:44:29 time: 1.0403 data_time: 0.0090 memory: 16131 loss: 0.0024 tflops: 7.7948 tokens_per_sec: 128.8136 +2025/09/25 22:17:14 - mmengine - INFO - Iter(train) [5270/7452] base_lr: 1.5371e-06 lr: 1.5371e-06 eta: 0:44:16 time: 1.0374 data_time: 0.0087 memory: 16131 loss: 1.4453 tflops: 3.9046 tokens_per_sec: 64.5824 +2025/09/25 22:17:24 - mmengine - INFO - Iter(train) [5280/7452] base_lr: 1.5352e-06 lr: 1.5352e-06 eta: 0:44:03 time: 1.0380 data_time: 0.0085 memory: 16131 loss: 2.3125 tflops: 6.6441 tokens_per_sec: 109.8266 +2025/09/25 22:17:35 - mmengine - INFO - Iter(train) [5290/7452] base_lr: 1.5334e-06 lr: 1.5334e-06 eta: 0:43:50 time: 1.0385 data_time: 0.0086 memory: 16131 loss: 2.0625 tflops: 8.3332 tokens_per_sec: 137.6939 +2025/09/25 22:17:46 - mmengine - INFO - Iter(train) [5300/7452] base_lr: 1.5315e-06 lr: 1.5315e-06 eta: 0:43:37 time: 1.1400 data_time: 0.0085 memory: 16131 loss: 1.7969 tflops: 4.6683 tokens_per_sec: 77.1928 +2025/09/25 22:17:57 - mmengine - INFO - Iter(train) [5310/7452] base_lr: 1.5296e-06 lr: 1.5296e-06 eta: 0:43:25 time: 1.0630 data_time: 0.0089 memory: 16131 loss: 0.0004 tflops: 6.7727 tokens_per_sec: 111.9452 +2025/09/25 22:18:08 - mmengine - INFO - Iter(train) [5320/7452] base_lr: 1.5277e-06 lr: 1.5277e-06 eta: 0:43:12 time: 1.3037 data_time: 0.0082 memory: 16131 loss: 0.0188 tflops: 5.7550 tokens_per_sec: 95.1169 +2025/09/25 22:18:19 - mmengine - INFO - Iter(train) [5330/7452] base_lr: 1.5258e-06 lr: 1.5258e-06 eta: 0:43:00 time: 1.0527 data_time: 0.0090 memory: 16131 loss: 0.0002 tflops: 7.8181 tokens_per_sec: 129.1953 +2025/09/25 22:18:30 - mmengine - INFO - Iter(train) [5340/7452] base_lr: 1.5239e-06 lr: 1.5239e-06 eta: 0:42:47 time: 1.0333 data_time: 0.0083 memory: 16131 loss: 1.1406 tflops: 5.6778 tokens_per_sec: 93.8752 +2025/09/25 22:18:41 - mmengine - INFO - Iter(train) [5350/7452] base_lr: 1.5221e-06 lr: 1.5221e-06 eta: 0:42:34 time: 1.2319 data_time: 0.0085 memory: 16131 loss: 0.0645 tflops: 5.4510 tokens_per_sec: 90.1080 +2025/09/25 22:18:52 - mmengine - INFO - Iter(train) [5360/7452] base_lr: 1.5202e-06 lr: 1.5202e-06 eta: 0:42:22 time: 1.0335 data_time: 0.0089 memory: 16131 loss: 1.9844 tflops: 5.3835 tokens_per_sec: 89.0157 +2025/09/25 22:19:02 - mmengine - INFO - Iter(train) [5370/7452] base_lr: 1.5183e-06 lr: 1.5183e-06 eta: 0:42:09 time: 1.0283 data_time: 0.0087 memory: 16131 loss: 2.3438 tflops: 5.9999 tokens_per_sec: 99.1942 +2025/09/25 22:19:14 - mmengine - INFO - Iter(train) [5380/7452] base_lr: 1.5164e-06 lr: 1.5164e-06 eta: 0:41:56 time: 1.1360 data_time: 0.0088 memory: 16132 loss: 1.0781 tflops: 4.8981 tokens_per_sec: 80.9889 +2025/09/25 22:19:25 - mmengine - INFO - Iter(train) [5390/7452] base_lr: 1.5145e-06 lr: 1.5145e-06 eta: 0:41:44 time: 1.0400 data_time: 0.0087 memory: 16130 loss: 0.1514 tflops: 6.6899 tokens_per_sec: 110.5818 +2025/09/25 22:19:35 - mmengine - INFO - Iter(train) [5400/7452] base_lr: 1.5126e-06 lr: 1.5126e-06 eta: 0:41:31 time: 1.0065 data_time: 0.0086 memory: 16131 loss: 0.0008 tflops: 6.7922 tokens_per_sec: 112.2754 +2025/09/25 22:19:46 - mmengine - INFO - Iter(train) [5410/7452] base_lr: 1.5107e-06 lr: 1.5107e-06 eta: 0:41:19 time: 1.2250 data_time: 0.0074 memory: 16131 loss: 0.9844 tflops: 3.8996 tokens_per_sec: 64.4895 +2025/09/25 22:19:57 - mmengine - INFO - Iter(train) [5420/7452] base_lr: 1.5087e-06 lr: 1.5087e-06 eta: 0:41:06 time: 1.2501 data_time: 0.0085 memory: 16131 loss: 0.0015 tflops: 5.8563 tokens_per_sec: 96.7958 +2025/09/25 22:20:08 - mmengine - INFO - Iter(train) [5430/7452] base_lr: 1.5068e-06 lr: 1.5068e-06 eta: 0:40:53 time: 1.2593 data_time: 0.0124 memory: 16130 loss: 0.0187 tflops: 5.4283 tokens_per_sec: 89.7312 +2025/09/25 22:20:20 - mmengine - INFO - Iter(train) [5440/7452] base_lr: 1.5049e-06 lr: 1.5049e-06 eta: 0:40:41 time: 1.6278 data_time: 0.0091 memory: 16131 loss: 0.0080 tflops: 4.4600 tokens_per_sec: 73.7173 +2025/09/25 22:20:30 - mmengine - INFO - Iter(train) [5450/7452] base_lr: 1.5030e-06 lr: 1.5030e-06 eta: 0:40:28 time: 1.2191 data_time: 0.0083 memory: 16131 loss: 2.2500 tflops: 3.9185 tokens_per_sec: 64.8018 +2025/09/25 22:20:41 - mmengine - INFO - Iter(train) [5460/7452] base_lr: 1.5011e-06 lr: 1.5011e-06 eta: 0:40:15 time: 1.0273 data_time: 0.0089 memory: 16131 loss: 1.3672 tflops: 5.0627 tokens_per_sec: 83.7175 +2025/09/25 22:20:52 - mmengine - INFO - Iter(train) [5470/7452] base_lr: 1.4992e-06 lr: 1.4992e-06 eta: 0:40:03 time: 1.0350 data_time: 0.0091 memory: 16131 loss: 1.2656 tflops: 4.4398 tokens_per_sec: 73.4268 +2025/09/25 22:21:02 - mmengine - INFO - Iter(train) [5480/7452] base_lr: 1.4972e-06 lr: 1.4972e-06 eta: 0:39:50 time: 1.0479 data_time: 0.0092 memory: 16131 loss: 0.0049 tflops: 6.9863 tokens_per_sec: 115.4723 +2025/09/25 22:21:13 - mmengine - INFO - Iter(train) [5490/7452] base_lr: 1.4953e-06 lr: 1.4953e-06 eta: 0:39:38 time: 1.0245 data_time: 0.0088 memory: 16131 loss: 0.0129 tflops: 6.5545 tokens_per_sec: 108.3500 +2025/09/25 22:21:24 - mmengine - INFO - Iter(train) [5500/7452] base_lr: 1.4934e-06 lr: 1.4934e-06 eta: 0:39:25 time: 1.0409 data_time: 0.0085 memory: 16131 loss: 0.0835 tflops: 6.9752 tokens_per_sec: 115.2897 +2025/09/25 22:21:35 - mmengine - INFO - Iter(train) [5510/7452] base_lr: 1.4915e-06 lr: 1.4915e-06 eta: 0:39:13 time: 1.0046 data_time: 0.0090 memory: 16131 loss: 0.0002 tflops: 8.4942 tokens_per_sec: 140.3578 +2025/09/25 22:21:46 - mmengine - INFO - Iter(train) [5520/7452] base_lr: 1.4895e-06 lr: 1.4895e-06 eta: 0:39:00 time: 1.0221 data_time: 0.0082 memory: 16131 loss: 0.0007 tflops: 8.4673 tokens_per_sec: 139.9105 +2025/09/25 22:21:57 - mmengine - INFO - Iter(train) [5530/7452] base_lr: 1.4876e-06 lr: 1.4876e-06 eta: 0:38:48 time: 1.0240 data_time: 0.0086 memory: 16131 loss: 2.5312 tflops: 6.9715 tokens_per_sec: 115.2323 +2025/09/25 22:22:08 - mmengine - INFO - Iter(train) [5540/7452] base_lr: 1.4856e-06 lr: 1.4856e-06 eta: 0:38:35 time: 1.0308 data_time: 0.0083 memory: 16131 loss: 0.1641 tflops: 6.9845 tokens_per_sec: 115.4463 +2025/09/25 22:22:19 - mmengine - INFO - Iter(train) [5550/7452] base_lr: 1.4837e-06 lr: 1.4837e-06 eta: 0:38:23 time: 1.2764 data_time: 0.0087 memory: 16131 loss: 0.0119 tflops: 5.0711 tokens_per_sec: 83.8324 +2025/09/25 22:22:30 - mmengine - INFO - Iter(train) [5560/7452] base_lr: 1.4818e-06 lr: 1.4818e-06 eta: 0:38:10 time: 1.0206 data_time: 0.0089 memory: 16131 loss: 0.0175 tflops: 7.1136 tokens_per_sec: 117.5782 +2025/09/25 22:22:41 - mmengine - INFO - Iter(train) [5570/7452] base_lr: 1.4798e-06 lr: 1.4798e-06 eta: 0:37:58 time: 1.0240 data_time: 0.0083 memory: 16131 loss: 0.0212 tflops: 5.8476 tokens_per_sec: 96.6790 +2025/09/25 22:22:52 - mmengine - INFO - Iter(train) [5580/7452] base_lr: 1.4779e-06 lr: 1.4779e-06 eta: 0:37:45 time: 1.0046 data_time: 0.0084 memory: 16130 loss: 0.0923 tflops: 5.9003 tokens_per_sec: 97.5529 +2025/09/25 22:23:02 - mmengine - INFO - Iter(train) [5590/7452] base_lr: 1.4759e-06 lr: 1.4759e-06 eta: 0:37:32 time: 1.0130 data_time: 0.0084 memory: 16131 loss: 1.7891 tflops: 7.3463 tokens_per_sec: 121.4193 +2025/09/25 22:23:13 - mmengine - INFO - Iter(train) [5600/7452] base_lr: 1.4740e-06 lr: 1.4740e-06 eta: 0:37:20 time: 1.2256 data_time: 0.0083 memory: 16131 loss: 0.0210 tflops: 5.2813 tokens_per_sec: 87.3076 +2025/09/25 22:23:24 - mmengine - INFO - Iter(train) [5610/7452] base_lr: 1.4720e-06 lr: 1.4720e-06 eta: 0:37:07 time: 1.0496 data_time: 0.0087 memory: 16131 loss: 0.0547 tflops: 6.8013 tokens_per_sec: 112.4194 +2025/09/25 22:23:35 - mmengine - INFO - Iter(train) [5620/7452] base_lr: 1.4701e-06 lr: 1.4701e-06 eta: 0:36:55 time: 1.0052 data_time: 0.0086 memory: 16131 loss: 0.0007 tflops: 8.4891 tokens_per_sec: 140.2738 +2025/09/25 22:23:46 - mmengine - INFO - Iter(train) [5630/7452] base_lr: 1.4681e-06 lr: 1.4681e-06 eta: 0:36:42 time: 1.0311 data_time: 0.0083 memory: 16131 loss: 2.4062 tflops: 7.0413 tokens_per_sec: 116.3835 +2025/09/25 22:23:48 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 22:23:48 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:23:58 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of irregularity or distortion. The tumor cells are organized in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of irregularity or distortion.<|im_end|> + +2025/09/25 22:23:58 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:24:47 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the tumor is a well-differentiated papillary renal cell carcinoma, which is a type of renal cell carcinoma. The tumor is characterized by the presence of papillary structures, which are lined by cuboidal or columnar cells with distinct cell borders. The tumor cells exhibit nuclear atypia, which is a feature of malignancy. Additionally, the report notes the presence of necrosis, which is a sign of aggressive tumor behavior. The tumor is also described as multifocal, indicating that it is distributed in multiple areas within the kidney. The report also mentions the presence of hemorrhage, which can be a sign of tumor invasion or growth. The tumor is described as infiltrating the renal parenchyma, which suggests that it has invaded the surrounding tissue. The report also notes the presence of tumor thrombi, which are blood clots within the renal veins that can indicate tumor spread. The tumor is also described as invading the renal vein, which is a sign of tumor progression. The report also mentions the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report also mentions the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal pelvis, which can indicate tumor spread to the collecting system. The report notes the presence of tumor cells in the renal + +2025/09/25 22:24:47 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:24:48 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 22:24:57 - mmengine - INFO - Iter(train) [5640/7452] base_lr: 1.4661e-06 lr: 1.4661e-06 eta: 0:36:50 time: 1.2960 data_time: 0.0084 memory: 16130 loss: 0.0089 tflops: 5.5550 tokens_per_sec: 91.8182 +2025/09/25 22:25:08 - mmengine - INFO - Iter(train) [5650/7452] base_lr: 1.4642e-06 lr: 1.4642e-06 eta: 0:36:37 time: 1.0333 data_time: 0.0086 memory: 16131 loss: 1.3516 tflops: 3.8029 tokens_per_sec: 62.9023 +2025/09/25 22:25:19 - mmengine - INFO - Iter(train) [5660/7452] base_lr: 1.4622e-06 lr: 1.4622e-06 eta: 0:36:24 time: 1.0214 data_time: 0.0081 memory: 16130 loss: 0.0270 tflops: 7.2269 tokens_per_sec: 119.4471 +2025/09/25 22:25:30 - mmengine - INFO - Iter(train) [5670/7452] base_lr: 1.4602e-06 lr: 1.4602e-06 eta: 0:36:12 time: 1.3173 data_time: 0.0084 memory: 16130 loss: 0.0010 tflops: 5.0056 tokens_per_sec: 82.7473 +2025/09/25 22:25:41 - mmengine - INFO - Iter(train) [5680/7452] base_lr: 1.4583e-06 lr: 1.4583e-06 eta: 0:35:59 time: 1.0095 data_time: 0.0082 memory: 16130 loss: 0.0155 tflops: 7.2520 tokens_per_sec: 119.8647 +2025/09/25 22:25:51 - mmengine - INFO - Iter(train) [5690/7452] base_lr: 1.4563e-06 lr: 1.4563e-06 eta: 0:35:46 time: 1.2973 data_time: 0.0083 memory: 16131 loss: 0.0074 tflops: 5.5965 tokens_per_sec: 92.5026 +2025/09/25 22:26:03 - mmengine - INFO - Iter(train) [5700/7452] base_lr: 1.4543e-06 lr: 1.4543e-06 eta: 0:35:34 time: 1.3423 data_time: 0.0084 memory: 16131 loss: 0.0105 tflops: 4.8671 tokens_per_sec: 80.4593 +2025/09/25 22:26:13 - mmengine - INFO - Iter(train) [5710/7452] base_lr: 1.4523e-06 lr: 1.4523e-06 eta: 0:35:21 time: 1.2345 data_time: 0.0084 memory: 16130 loss: 0.0640 tflops: 7.1088 tokens_per_sec: 117.4602 +2025/09/25 22:26:23 - mmengine - INFO - Iter(train) [5720/7452] base_lr: 1.4504e-06 lr: 1.4504e-06 eta: 0:35:08 time: 1.0437 data_time: 0.0088 memory: 16131 loss: 0.0029 tflops: 7.4204 tokens_per_sec: 122.6355 +2025/09/25 22:26:34 - mmengine - INFO - Iter(train) [5730/7452] base_lr: 1.4484e-06 lr: 1.4484e-06 eta: 0:34:56 time: 1.0346 data_time: 0.0081 memory: 16130 loss: 1.4453 tflops: 5.8464 tokens_per_sec: 96.6593 +2025/09/25 22:26:45 - mmengine - INFO - Iter(train) [5740/7452] base_lr: 1.4464e-06 lr: 1.4464e-06 eta: 0:34:43 time: 1.0518 data_time: 0.0086 memory: 16131 loss: 1.7891 tflops: 4.0240 tokens_per_sec: 66.5548 +2025/09/25 22:26:55 - mmengine - INFO - Iter(train) [5750/7452] base_lr: 1.4444e-06 lr: 1.4444e-06 eta: 0:34:31 time: 1.0454 data_time: 0.0088 memory: 16131 loss: 2.1250 tflops: 5.7278 tokens_per_sec: 94.6984 +2025/09/25 22:27:07 - mmengine - INFO - Iter(train) [5760/7452] base_lr: 1.4424e-06 lr: 1.4424e-06 eta: 0:34:18 time: 1.2557 data_time: 0.0085 memory: 16131 loss: 2.1406 tflops: 3.2258 tokens_per_sec: 53.3563 +2025/09/25 22:27:18 - mmengine - INFO - Iter(train) [5770/7452] base_lr: 1.4404e-06 lr: 1.4404e-06 eta: 0:34:06 time: 1.3287 data_time: 0.0084 memory: 16130 loss: 0.0488 tflops: 5.1903 tokens_per_sec: 85.7960 +2025/09/25 22:27:28 - mmengine - INFO - Iter(train) [5780/7452] base_lr: 1.4384e-06 lr: 1.4384e-06 eta: 0:33:53 time: 1.2527 data_time: 0.0084 memory: 16131 loss: 0.0703 tflops: 4.6834 tokens_per_sec: 77.4334 +2025/09/25 22:27:39 - mmengine - INFO - Iter(train) [5790/7452] base_lr: 1.4364e-06 lr: 1.4364e-06 eta: 0:33:40 time: 1.0187 data_time: 0.0084 memory: 16130 loss: 0.0398 tflops: 6.4728 tokens_per_sec: 107.0027 +2025/09/25 22:27:49 - mmengine - INFO - Iter(train) [5800/7452] base_lr: 1.4344e-06 lr: 1.4344e-06 eta: 0:33:28 time: 1.0000 data_time: 0.0082 memory: 16131 loss: 1.8906 tflops: 5.1403 tokens_per_sec: 85.0018 +2025/09/25 22:28:00 - mmengine - INFO - Iter(train) [5810/7452] base_lr: 1.4324e-06 lr: 1.4324e-06 eta: 0:33:15 time: 1.0048 data_time: 0.0086 memory: 16130 loss: 2.2656 tflops: 4.8146 tokens_per_sec: 79.6210 +2025/09/25 22:28:10 - mmengine - INFO - Iter(train) [5820/7452] base_lr: 1.4304e-06 lr: 1.4304e-06 eta: 0:33:03 time: 1.0123 data_time: 0.0080 memory: 16131 loss: 0.0972 tflops: 6.9323 tokens_per_sec: 114.5871 +2025/09/25 22:28:22 - mmengine - INFO - Iter(train) [5830/7452] base_lr: 1.4284e-06 lr: 1.4284e-06 eta: 0:32:50 time: 1.0133 data_time: 0.0091 memory: 16131 loss: 0.0142 tflops: 6.4474 tokens_per_sec: 106.5834 +2025/09/25 22:28:33 - mmengine - INFO - Iter(train) [5840/7452] base_lr: 1.4264e-06 lr: 1.4264e-06 eta: 0:32:38 time: 1.0182 data_time: 0.0085 memory: 16130 loss: 0.0359 tflops: 7.2497 tokens_per_sec: 119.8237 +2025/09/25 22:28:44 - mmengine - INFO - Iter(train) [5850/7452] base_lr: 1.4244e-06 lr: 1.4244e-06 eta: 0:32:25 time: 1.0046 data_time: 0.0083 memory: 16130 loss: 0.0055 tflops: 6.2016 tokens_per_sec: 102.5275 +2025/09/25 22:28:54 - mmengine - INFO - Iter(train) [5860/7452] base_lr: 1.4224e-06 lr: 1.4224e-06 eta: 0:32:13 time: 1.1968 data_time: 0.0084 memory: 16130 loss: 1.6953 tflops: 4.0926 tokens_per_sec: 67.6803 +2025/09/25 22:29:05 - mmengine - INFO - Iter(train) [5870/7452] base_lr: 1.4204e-06 lr: 1.4204e-06 eta: 0:32:00 time: 1.0142 data_time: 0.0089 memory: 16131 loss: 0.2314 tflops: 5.9040 tokens_per_sec: 97.6121 +2025/09/25 22:29:16 - mmengine - INFO - Iter(train) [5880/7452] base_lr: 1.4184e-06 lr: 1.4184e-06 eta: 0:31:48 time: 1.0157 data_time: 0.0091 memory: 16130 loss: 0.0123 tflops: 8.0432 tokens_per_sec: 132.9172 +2025/09/25 22:29:27 - mmengine - INFO - Iter(train) [5890/7452] base_lr: 1.4164e-06 lr: 1.4164e-06 eta: 0:31:35 time: 1.0228 data_time: 0.0085 memory: 16131 loss: 2.4531 tflops: 5.6767 tokens_per_sec: 93.8584 +2025/09/25 22:29:38 - mmengine - INFO - Iter(train) [5900/7452] base_lr: 1.4144e-06 lr: 1.4144e-06 eta: 0:31:23 time: 1.0128 data_time: 0.0091 memory: 16130 loss: 1.7812 tflops: 4.4778 tokens_per_sec: 74.0552 +2025/09/25 22:29:49 - mmengine - INFO - Iter(train) [5910/7452] base_lr: 1.4123e-06 lr: 1.4123e-06 eta: 0:31:10 time: 1.0379 data_time: 0.0086 memory: 16130 loss: 0.0620 tflops: 5.3024 tokens_per_sec: 87.6749 +2025/09/25 22:30:00 - mmengine - INFO - Iter(train) [5920/7452] base_lr: 1.4103e-06 lr: 1.4103e-06 eta: 0:30:58 time: 1.0151 data_time: 0.0117 memory: 16131 loss: 0.0016 tflops: 7.0328 tokens_per_sec: 116.2461 +2025/09/25 22:30:10 - mmengine - INFO - Iter(train) [5930/7452] base_lr: 1.4083e-06 lr: 1.4083e-06 eta: 0:30:45 time: 1.0107 data_time: 0.0084 memory: 16131 loss: 1.7031 tflops: 6.0443 tokens_per_sec: 99.9293 +2025/09/25 22:30:21 - mmengine - INFO - Iter(train) [5940/7452] base_lr: 1.4063e-06 lr: 1.4063e-06 eta: 0:30:33 time: 1.0063 data_time: 0.0086 memory: 16131 loss: 0.0265 tflops: 6.7330 tokens_per_sec: 111.2988 +2025/09/25 22:30:32 - mmengine - INFO - Iter(train) [5950/7452] base_lr: 1.4042e-06 lr: 1.4042e-06 eta: 0:30:20 time: 1.0155 data_time: 0.0088 memory: 16131 loss: 0.0752 tflops: 6.8512 tokens_per_sec: 113.2479 +2025/09/25 22:30:43 - mmengine - INFO - Iter(train) [5960/7452] base_lr: 1.4022e-06 lr: 1.4022e-06 eta: 0:30:08 time: 1.0038 data_time: 0.0087 memory: 16131 loss: 0.0136 tflops: 5.3620 tokens_per_sec: 88.6622 +2025/09/25 22:30:53 - mmengine - INFO - Iter(train) [5970/7452] base_lr: 1.4002e-06 lr: 1.4002e-06 eta: 0:29:55 time: 1.0384 data_time: 0.0088 memory: 16130 loss: 0.0118 tflops: 6.2914 tokens_per_sec: 104.0047 +2025/09/25 22:31:04 - mmengine - INFO - Iter(train) [5980/7452] base_lr: 1.3981e-06 lr: 1.3981e-06 eta: 0:29:43 time: 1.0335 data_time: 0.0084 memory: 16130 loss: 1.9688 tflops: 5.2079 tokens_per_sec: 86.1155 +2025/09/25 22:31:15 - mmengine - INFO - Iter(train) [5990/7452] base_lr: 1.3961e-06 lr: 1.3961e-06 eta: 0:29:30 time: 1.1932 data_time: 0.0086 memory: 16131 loss: 0.0488 tflops: 5.7289 tokens_per_sec: 94.6995 +2025/09/25 22:31:25 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 22:31:25 - mmengine - INFO - Iter(train) [6000/7452] base_lr: 1.3941e-06 lr: 1.3941e-06 eta: 0:29:18 time: 1.1023 data_time: 0.0088 memory: 16131 loss: 1.6562 tflops: 3.5651 tokens_per_sec: 58.9688 +2025/09/25 22:31:36 - mmengine - INFO - Iter(train) [6010/7452] base_lr: 1.3920e-06 lr: 1.3920e-06 eta: 0:29:06 time: 0.9960 data_time: 0.0086 memory: 16132 loss: 0.0297 tflops: 6.6201 tokens_per_sec: 109.4365 +2025/09/25 22:31:47 - mmengine - INFO - Iter(train) [6020/7452] base_lr: 1.3900e-06 lr: 1.3900e-06 eta: 0:28:53 time: 0.9894 data_time: 0.0084 memory: 16131 loss: 0.0198 tflops: 6.8481 tokens_per_sec: 113.2023 +2025/09/25 22:31:58 - mmengine - INFO - Iter(train) [6030/7452] base_lr: 1.3879e-06 lr: 1.3879e-06 eta: 0:28:41 time: 1.0048 data_time: 0.0084 memory: 16131 loss: 1.6094 tflops: 4.3926 tokens_per_sec: 72.6494 +2025/09/25 22:32:08 - mmengine - INFO - Iter(train) [6040/7452] base_lr: 1.3859e-06 lr: 1.3859e-06 eta: 0:28:28 time: 1.0164 data_time: 0.0083 memory: 16131 loss: 1.9922 tflops: 7.6798 tokens_per_sec: 126.9218 +2025/09/25 22:32:19 - mmengine - INFO - Iter(train) [6050/7452] base_lr: 1.3838e-06 lr: 1.3838e-06 eta: 0:28:16 time: 0.9980 data_time: 0.0087 memory: 16131 loss: 2.1406 tflops: 4.3621 tokens_per_sec: 72.1457 +2025/09/25 22:32:30 - mmengine - INFO - Iter(train) [6060/7452] base_lr: 1.3818e-06 lr: 1.3818e-06 eta: 0:28:03 time: 1.2954 data_time: 0.0086 memory: 16131 loss: 0.0052 tflops: 5.3706 tokens_per_sec: 88.7746 +2025/09/25 22:32:40 - mmengine - INFO - Iter(train) [6070/7452] base_lr: 1.3797e-06 lr: 1.3797e-06 eta: 0:27:51 time: 1.0110 data_time: 0.0085 memory: 16131 loss: 0.0850 tflops: 7.1815 tokens_per_sec: 118.7001 +2025/09/25 22:32:51 - mmengine - INFO - Iter(train) [6080/7452] base_lr: 1.3777e-06 lr: 1.3777e-06 eta: 0:27:39 time: 1.2564 data_time: 0.0091 memory: 16130 loss: 0.0479 tflops: 5.3928 tokens_per_sec: 89.1444 +2025/09/25 22:33:02 - mmengine - INFO - Iter(train) [6090/7452] base_lr: 1.3756e-06 lr: 1.3756e-06 eta: 0:27:26 time: 1.3521 data_time: 0.0088 memory: 16130 loss: 2.4375 tflops: 4.6079 tokens_per_sec: 76.1792 +2025/09/25 22:33:13 - mmengine - INFO - Iter(train) [6100/7452] base_lr: 1.3736e-06 lr: 1.3736e-06 eta: 0:27:14 time: 1.0024 data_time: 0.0088 memory: 16133 loss: 0.0111 tflops: 6.4573 tokens_per_sec: 106.7484 +2025/09/25 22:33:23 - mmengine - INFO - Iter(train) [6110/7452] base_lr: 1.3715e-06 lr: 1.3715e-06 eta: 0:27:01 time: 1.0273 data_time: 0.0087 memory: 16130 loss: 0.0175 tflops: 8.0701 tokens_per_sec: 133.3581 +2025/09/25 22:33:35 - mmengine - INFO - Iter(train) [6120/7452] base_lr: 1.3694e-06 lr: 1.3694e-06 eta: 0:26:49 time: 1.0408 data_time: 0.0089 memory: 16131 loss: 2.5469 tflops: 5.3457 tokens_per_sec: 88.3895 +2025/09/25 22:33:45 - mmengine - INFO - Iter(train) [6130/7452] base_lr: 1.3674e-06 lr: 1.3674e-06 eta: 0:26:37 time: 1.0491 data_time: 0.0085 memory: 16131 loss: 0.0077 tflops: 7.0934 tokens_per_sec: 117.2389 +2025/09/25 22:33:56 - mmengine - INFO - Iter(train) [6140/7452] base_lr: 1.3653e-06 lr: 1.3653e-06 eta: 0:26:25 time: 0.9870 data_time: 0.0087 memory: 16131 loss: 2.3438 tflops: 4.8398 tokens_per_sec: 80.0380 +2025/09/25 22:34:01 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 22:34:01 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:34:08 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of necrosis or hemorrhage. The tumor cells are well-differentiated and show a cohesive arrangement.<|im_end|> + +2025/09/25 22:34:08 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:34:58 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image of the biopsy specimen reveals a well-demarcated area of neoplastic tissue. The neoplastic cells are predominantly arranged in a tubular pattern, with some areas showing a papillary architecture. The neoplastic cells are moderately differentiated, with moderate nuclear atypia and occasional mitotic figures. The stroma surrounding the neoplastic cells is fibrotic. The tumor cells exhibit a high degree of cellular atypia, with marked nuclear pleomorphism and prominent nucleoli. The tumor cells are also characterized by a high mitotic rate, with frequent mitotic figures observed. The tumor cells are positive for estrogen receptor (ER) and progesterone receptor (PR), and negative for HER2/neu. The tumor cells are also positive for p53 and p16. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive + +2025/09/25 22:34:58 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:34:59 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 22:35:05 - mmengine - INFO - Iter(train) [6150/7452] base_lr: 1.3633e-06 lr: 1.3633e-06 eta: 0:26:24 time: 0.9959 data_time: 0.0082 memory: 16131 loss: 0.0645 tflops: 4.4318 tokens_per_sec: 73.2970 +2025/09/25 22:35:15 - mmengine - INFO - Iter(train) [6160/7452] base_lr: 1.3612e-06 lr: 1.3612e-06 eta: 0:26:12 time: 1.0062 data_time: 0.0088 memory: 16130 loss: 1.9531 tflops: 3.9655 tokens_per_sec: 65.5913 +2025/09/25 22:35:26 - mmengine - INFO - Iter(train) [6170/7452] base_lr: 1.3591e-06 lr: 1.3591e-06 eta: 0:25:59 time: 1.0106 data_time: 0.0087 memory: 16130 loss: 1.7266 tflops: 4.4274 tokens_per_sec: 73.2229 +2025/09/25 22:35:37 - mmengine - INFO - Iter(train) [6180/7452] base_lr: 1.3570e-06 lr: 1.3570e-06 eta: 0:25:47 time: 1.2078 data_time: 0.0085 memory: 16131 loss: 0.0131 tflops: 5.1583 tokens_per_sec: 85.2793 +2025/09/25 22:35:47 - mmengine - INFO - Iter(train) [6190/7452] base_lr: 1.3550e-06 lr: 1.3550e-06 eta: 0:25:34 time: 1.0095 data_time: 0.0088 memory: 16130 loss: 0.0099 tflops: 7.0717 tokens_per_sec: 116.8882 +2025/09/25 22:35:58 - mmengine - INFO - Iter(train) [6200/7452] base_lr: 1.3529e-06 lr: 1.3529e-06 eta: 0:25:22 time: 1.2446 data_time: 0.0083 memory: 16131 loss: 0.0029 tflops: 6.1740 tokens_per_sec: 102.0377 +2025/09/25 22:36:09 - mmengine - INFO - Iter(train) [6210/7452] base_lr: 1.3508e-06 lr: 1.3508e-06 eta: 0:25:10 time: 1.0065 data_time: 0.0086 memory: 16131 loss: 0.0216 tflops: 7.1528 tokens_per_sec: 118.2275 +2025/09/25 22:36:20 - mmengine - INFO - Iter(train) [6220/7452] base_lr: 1.3487e-06 lr: 1.3487e-06 eta: 0:24:57 time: 1.0076 data_time: 0.0088 memory: 16130 loss: 1.6953 tflops: 4.3805 tokens_per_sec: 72.4495 +2025/09/25 22:36:30 - mmengine - INFO - Iter(train) [6230/7452] base_lr: 1.3467e-06 lr: 1.3467e-06 eta: 0:24:45 time: 0.9966 data_time: 0.0088 memory: 16130 loss: 0.2197 tflops: 6.3732 tokens_per_sec: 105.3618 +2025/09/25 22:36:41 - mmengine - INFO - Iter(train) [6240/7452] base_lr: 1.3446e-06 lr: 1.3446e-06 eta: 0:24:32 time: 0.9943 data_time: 0.0089 memory: 16130 loss: 0.0361 tflops: 6.8144 tokens_per_sec: 112.6449 +2025/09/25 22:36:52 - mmengine - INFO - Iter(train) [6250/7452] base_lr: 1.3425e-06 lr: 1.3425e-06 eta: 0:24:20 time: 1.0406 data_time: 0.0090 memory: 16130 loss: 1.4531 tflops: 3.7764 tokens_per_sec: 62.4635 +2025/09/25 22:37:03 - mmengine - INFO - Iter(train) [6260/7452] base_lr: 1.3404e-06 lr: 1.3404e-06 eta: 0:24:07 time: 1.0367 data_time: 0.0087 memory: 16131 loss: 1.3516 tflops: 4.6080 tokens_per_sec: 76.2061 +2025/09/25 22:37:14 - mmengine - INFO - Iter(train) [6270/7452] base_lr: 1.3383e-06 lr: 1.3383e-06 eta: 0:23:55 time: 1.0098 data_time: 0.0090 memory: 16130 loss: 0.0008 tflops: 7.5495 tokens_per_sec: 124.7722 +2025/09/25 22:37:24 - mmengine - INFO - Iter(train) [6280/7452] base_lr: 1.3362e-06 lr: 1.3362e-06 eta: 0:23:43 time: 1.0823 data_time: 0.0086 memory: 16130 loss: 0.0048 tflops: 6.2605 tokens_per_sec: 103.4876 +2025/09/25 22:37:35 - mmengine - INFO - Iter(train) [6290/7452] base_lr: 1.3341e-06 lr: 1.3341e-06 eta: 0:23:30 time: 1.2591 data_time: 0.0085 memory: 16131 loss: 1.7422 tflops: 3.3614 tokens_per_sec: 55.5967 +2025/09/25 22:37:46 - mmengine - INFO - Iter(train) [6300/7452] base_lr: 1.3321e-06 lr: 1.3321e-06 eta: 0:23:18 time: 1.0124 data_time: 0.0086 memory: 16131 loss: 0.0618 tflops: 6.5730 tokens_per_sec: 108.6565 +2025/09/25 22:37:57 - mmengine - INFO - Iter(train) [6310/7452] base_lr: 1.3300e-06 lr: 1.3300e-06 eta: 0:23:06 time: 1.0001 data_time: 0.0087 memory: 16131 loss: 2.0938 tflops: 5.3817 tokens_per_sec: 88.9889 +2025/09/25 22:38:07 - mmengine - INFO - Iter(train) [6320/7452] base_lr: 1.3279e-06 lr: 1.3279e-06 eta: 0:22:53 time: 1.0016 data_time: 0.0085 memory: 16131 loss: 1.8594 tflops: 5.7365 tokens_per_sec: 94.8483 +2025/09/25 22:38:18 - mmengine - INFO - Iter(train) [6330/7452] base_lr: 1.3258e-06 lr: 1.3258e-06 eta: 0:22:41 time: 1.0275 data_time: 0.0085 memory: 16131 loss: 1.7344 tflops: 8.7770 tokens_per_sec: 145.0150 +2025/09/25 22:38:29 - mmengine - INFO - Iter(train) [6340/7452] base_lr: 1.3237e-06 lr: 1.3237e-06 eta: 0:22:28 time: 1.0136 data_time: 0.0090 memory: 16130 loss: 0.9219 tflops: 3.5190 tokens_per_sec: 58.2107 +2025/09/25 22:38:40 - mmengine - INFO - Iter(train) [6350/7452] base_lr: 1.3216e-06 lr: 1.3216e-06 eta: 0:22:16 time: 1.0353 data_time: 0.0088 memory: 16130 loss: 1.3906 tflops: 3.9712 tokens_per_sec: 65.6841 +2025/09/25 22:38:51 - mmengine - INFO - Iter(train) [6360/7452] base_lr: 1.3195e-06 lr: 1.3195e-06 eta: 0:22:04 time: 1.0195 data_time: 0.0085 memory: 16131 loss: 1.8281 tflops: 9.4999 tokens_per_sec: 156.9364 +2025/09/25 22:39:01 - mmengine - INFO - Iter(train) [6370/7452] base_lr: 1.3174e-06 lr: 1.3174e-06 eta: 0:21:51 time: 1.0133 data_time: 0.0091 memory: 16131 loss: 2.0312 tflops: 7.8827 tokens_per_sec: 130.2689 +2025/09/25 22:39:12 - mmengine - INFO - Iter(train) [6380/7452] base_lr: 1.3153e-06 lr: 1.3153e-06 eta: 0:21:39 time: 0.9921 data_time: 0.0091 memory: 16133 loss: 2.7344 tflops: 5.7304 tokens_per_sec: 94.7486 +2025/09/25 22:39:23 - mmengine - INFO - Iter(train) [6390/7452] base_lr: 1.3132e-06 lr: 1.3132e-06 eta: 0:21:27 time: 1.0183 data_time: 0.0083 memory: 16131 loss: 1.9297 tflops: 2.9681 tokens_per_sec: 49.1035 +2025/09/25 22:39:34 - mmengine - INFO - Iter(train) [6400/7452] base_lr: 1.3110e-06 lr: 1.3110e-06 eta: 0:21:14 time: 1.2746 data_time: 0.0090 memory: 16131 loss: 0.0005 tflops: 4.9356 tokens_per_sec: 81.5967 +2025/09/25 22:39:44 - mmengine - INFO - Iter(train) [6410/7452] base_lr: 1.3089e-06 lr: 1.3089e-06 eta: 0:21:02 time: 1.0199 data_time: 0.0087 memory: 16131 loss: 1.2891 tflops: 4.8026 tokens_per_sec: 79.4218 +2025/09/25 22:39:55 - mmengine - INFO - Iter(train) [6420/7452] base_lr: 1.3068e-06 lr: 1.3068e-06 eta: 0:20:50 time: 1.0135 data_time: 0.0086 memory: 16131 loss: 0.0037 tflops: 6.7448 tokens_per_sec: 111.4928 +2025/09/25 22:40:05 - mmengine - INFO - Iter(train) [6430/7452] base_lr: 1.3047e-06 lr: 1.3047e-06 eta: 0:20:37 time: 1.2528 data_time: 0.0083 memory: 16132 loss: 2.2500 tflops: 4.8281 tokens_per_sec: 79.8234 +2025/09/25 22:40:16 - mmengine - INFO - Iter(train) [6440/7452] base_lr: 1.3026e-06 lr: 1.3026e-06 eta: 0:20:25 time: 0.9982 data_time: 0.0087 memory: 16130 loss: 1.5547 tflops: 3.8155 tokens_per_sec: 63.1131 +2025/09/25 22:40:27 - mmengine - INFO - Iter(train) [6450/7452] base_lr: 1.3005e-06 lr: 1.3005e-06 eta: 0:20:12 time: 1.0220 data_time: 0.0090 memory: 16131 loss: 0.0047 tflops: 7.2227 tokens_per_sec: 119.3777 +2025/09/25 22:40:37 - mmengine - INFO - Iter(train) [6460/7452] base_lr: 1.2984e-06 lr: 1.2984e-06 eta: 0:20:00 time: 1.0112 data_time: 0.0086 memory: 16131 loss: 1.7266 tflops: 4.4246 tokens_per_sec: 73.1771 +2025/09/25 22:40:48 - mmengine - INFO - Iter(train) [6470/7452] base_lr: 1.2962e-06 lr: 1.2962e-06 eta: 0:19:48 time: 1.0081 data_time: 0.0085 memory: 16130 loss: 0.0060 tflops: 6.8410 tokens_per_sec: 113.0812 +2025/09/25 22:40:59 - mmengine - INFO - Iter(train) [6480/7452] base_lr: 1.2941e-06 lr: 1.2941e-06 eta: 0:19:36 time: 1.0155 data_time: 0.0084 memory: 16131 loss: 1.1484 tflops: 4.7043 tokens_per_sec: 77.7972 +2025/09/25 22:41:09 - mmengine - INFO - Iter(train) [6490/7452] base_lr: 1.2920e-06 lr: 1.2920e-06 eta: 0:19:23 time: 1.0111 data_time: 0.0083 memory: 16131 loss: 1.6406 tflops: 4.7244 tokens_per_sec: 78.1296 +2025/09/25 22:41:20 - mmengine - INFO - Iter(train) [6500/7452] base_lr: 1.2899e-06 lr: 1.2899e-06 eta: 0:19:11 time: 1.2249 data_time: 0.0091 memory: 16130 loss: 1.8125 tflops: 5.2840 tokens_per_sec: 87.3524 +2025/09/25 22:41:31 - mmengine - INFO - Iter(train) [6510/7452] base_lr: 1.2878e-06 lr: 1.2878e-06 eta: 0:18:59 time: 1.0251 data_time: 0.0088 memory: 16131 loss: 2.0625 tflops: 6.4323 tokens_per_sec: 106.3328 +2025/09/25 22:41:41 - mmengine - INFO - Iter(train) [6520/7452] base_lr: 1.2856e-06 lr: 1.2856e-06 eta: 0:18:46 time: 1.0141 data_time: 0.0085 memory: 16131 loss: 2.0000 tflops: 4.3524 tokens_per_sec: 71.9848 +2025/09/25 22:41:53 - mmengine - INFO - Iter(train) [6530/7452] base_lr: 1.2835e-06 lr: 1.2835e-06 eta: 0:18:34 time: 1.2550 data_time: 0.0083 memory: 16131 loss: 0.1328 tflops: 4.8679 tokens_per_sec: 80.4798 +2025/09/25 22:42:03 - mmengine - INFO - Iter(train) [6540/7452] base_lr: 1.2814e-06 lr: 1.2814e-06 eta: 0:18:22 time: 1.0064 data_time: 0.0088 memory: 16131 loss: 2.2500 tflops: 6.1909 tokens_per_sec: 102.3498 +2025/09/25 22:42:14 - mmengine - INFO - Iter(train) [6550/7452] base_lr: 1.2792e-06 lr: 1.2792e-06 eta: 0:18:09 time: 1.0262 data_time: 0.0088 memory: 16131 loss: 0.0077 tflops: 8.0196 tokens_per_sec: 132.5251 +2025/09/25 22:42:25 - mmengine - INFO - Iter(train) [6560/7452] base_lr: 1.2771e-06 lr: 1.2771e-06 eta: 0:17:57 time: 1.3350 data_time: 0.0088 memory: 16131 loss: 0.0225 tflops: 4.6668 tokens_per_sec: 77.1531 +2025/09/25 22:42:35 - mmengine - INFO - Iter(train) [6570/7452] base_lr: 1.2750e-06 lr: 1.2750e-06 eta: 0:17:45 time: 1.0312 data_time: 0.0085 memory: 16131 loss: 0.0243 tflops: 6.0420 tokens_per_sec: 99.8881 +2025/09/25 22:42:46 - mmengine - INFO - Iter(train) [6580/7452] base_lr: 1.2728e-06 lr: 1.2728e-06 eta: 0:17:33 time: 1.2481 data_time: 0.0088 memory: 16131 loss: 1.7109 tflops: 3.5365 tokens_per_sec: 58.4902 +2025/09/25 22:42:57 - mmengine - INFO - Iter(train) [6590/7452] base_lr: 1.2707e-06 lr: 1.2707e-06 eta: 0:17:20 time: 1.0192 data_time: 0.0095 memory: 16131 loss: 2.2969 tflops: 6.6481 tokens_per_sec: 109.8952 +2025/09/25 22:43:08 - mmengine - INFO - Iter(train) [6600/7452] base_lr: 1.2686e-06 lr: 1.2686e-06 eta: 0:17:08 time: 0.9947 data_time: 0.0085 memory: 16131 loss: 0.1118 tflops: 6.2026 tokens_per_sec: 102.5444 +2025/09/25 22:43:18 - mmengine - INFO - Iter(train) [6610/7452] base_lr: 1.2664e-06 lr: 1.2664e-06 eta: 0:16:56 time: 1.0159 data_time: 0.0086 memory: 16130 loss: 1.9609 tflops: 4.0471 tokens_per_sec: 66.9387 +2025/09/25 22:43:29 - mmengine - INFO - Iter(train) [6620/7452] base_lr: 1.2643e-06 lr: 1.2643e-06 eta: 0:16:44 time: 1.0078 data_time: 0.0090 memory: 16130 loss: 0.0014 tflops: 6.7834 tokens_per_sec: 112.1301 +2025/09/25 22:43:39 - mmengine - INFO - Iter(train) [6630/7452] base_lr: 1.2622e-06 lr: 1.2622e-06 eta: 0:16:31 time: 1.2105 data_time: 0.0088 memory: 16131 loss: 1.7578 tflops: 4.6964 tokens_per_sec: 77.6526 +2025/09/25 22:43:50 - mmengine - INFO - Iter(train) [6640/7452] base_lr: 1.2600e-06 lr: 1.2600e-06 eta: 0:16:19 time: 1.0113 data_time: 0.0089 memory: 16130 loss: 0.0239 tflops: 5.8614 tokens_per_sec: 96.9090 +2025/09/25 22:44:00 - mmengine - INFO - Iter(train) [6650/7452] base_lr: 1.2579e-06 lr: 1.2579e-06 eta: 0:16:07 time: 1.0287 data_time: 0.0086 memory: 16131 loss: 1.5078 tflops: 4.1140 tokens_per_sec: 68.0443 +2025/09/25 22:44:07 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 22:44:07 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:44:11 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a manner that resembles the lobular structures of normal breast tissue.<|im_end|> + +2025/09/25 22:44:11 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:45:00 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image reveals a well-demarcated, moderately sized neoplasm composed of neoplastic cells with a high nuclear grade, indicative of a poorly differentiated carcinoma. The neoplastic cells exhibit marked pleomorphism, with irregular nuclear contours and prominent nucleoli. The tumor cells are arranged in sheets and nests, with areas of glandular and cribriform differentiation. The tumor cells show brisk mitotic activity, with numerous mitotic figures observed. The stroma surrounding the tumor is fibrotic, with areas of necrosis present. The tumor cells infiltrate the surrounding tissue, including the muscularis mucosa and submucosa. The tumor cells are positive for cytokeratin and negative for CD34, CD117, and CD10. The tumor cells are negative for estrogen receptor (ER), progesterone receptor (PR), and HER2/neu. The tumor cells are positive for p53 and p16. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative for p16. The tumor cells are positive for p53. The tumor cells are negative + +2025/09/25 22:45:00 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:45:01 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 22:45:08 - mmengine - INFO - Iter(train) [6660/7452] base_lr: 1.2557e-06 lr: 1.2557e-06 eta: 0:16:01 time: 1.0015 data_time: 0.0083 memory: 16131 loss: 0.0019 tflops: 7.3703 tokens_per_sec: 121.8177 +2025/09/25 22:45:18 - mmengine - INFO - Iter(train) [6670/7452] base_lr: 1.2536e-06 lr: 1.2536e-06 eta: 0:15:49 time: 1.0019 data_time: 0.0090 memory: 16131 loss: 0.0011 tflops: 7.9119 tokens_per_sec: 130.7536 +2025/09/25 22:45:29 - mmengine - INFO - Iter(train) [6680/7452] base_lr: 1.2514e-06 lr: 1.2514e-06 eta: 0:15:37 time: 1.3079 data_time: 0.0092 memory: 16131 loss: 1.3047 tflops: 3.3285 tokens_per_sec: 55.0505 +2025/09/25 22:45:40 - mmengine - INFO - Iter(train) [6690/7452] base_lr: 1.2493e-06 lr: 1.2493e-06 eta: 0:15:24 time: 1.0200 data_time: 0.0084 memory: 16131 loss: 0.0035 tflops: 6.8207 tokens_per_sec: 112.7440 +2025/09/25 22:45:50 - mmengine - INFO - Iter(train) [6700/7452] base_lr: 1.2471e-06 lr: 1.2471e-06 eta: 0:15:12 time: 1.0109 data_time: 0.0092 memory: 16131 loss: 1.4062 tflops: 4.9652 tokens_per_sec: 82.1076 +2025/09/25 22:46:01 - mmengine - INFO - Iter(train) [6710/7452] base_lr: 1.2450e-06 lr: 1.2450e-06 eta: 0:15:00 time: 1.0035 data_time: 0.0083 memory: 16130 loss: 1.4062 tflops: 4.0367 tokens_per_sec: 66.7687 +2025/09/25 22:46:12 - mmengine - INFO - Iter(train) [6720/7452] base_lr: 1.2428e-06 lr: 1.2428e-06 eta: 0:14:48 time: 1.2854 data_time: 0.0087 memory: 16133 loss: 0.0053 tflops: 6.3553 tokens_per_sec: 105.0242 +2025/09/25 22:46:23 - mmengine - INFO - Iter(train) [6730/7452] base_lr: 1.2407e-06 lr: 1.2407e-06 eta: 0:14:35 time: 1.2474 data_time: 0.0090 memory: 16131 loss: 1.9453 tflops: 5.8200 tokens_per_sec: 96.1967 +2025/09/25 22:46:33 - mmengine - INFO - Iter(train) [6740/7452] base_lr: 1.2385e-06 lr: 1.2385e-06 eta: 0:14:23 time: 1.0115 data_time: 0.0082 memory: 16131 loss: 2.1719 tflops: 5.5609 tokens_per_sec: 91.9467 +2025/09/25 22:46:44 - mmengine - INFO - Iter(train) [6750/7452] base_lr: 1.2364e-06 lr: 1.2364e-06 eta: 0:14:11 time: 1.0316 data_time: 0.0093 memory: 16130 loss: 1.7656 tflops: 4.2199 tokens_per_sec: 69.7935 +2025/09/25 22:46:55 - mmengine - INFO - Iter(train) [6760/7452] base_lr: 1.2342e-06 lr: 1.2342e-06 eta: 0:13:58 time: 1.0198 data_time: 0.0082 memory: 16130 loss: 1.5000 tflops: 4.4467 tokens_per_sec: 73.5421 +2025/09/25 22:47:05 - mmengine - INFO - Iter(train) [6770/7452] base_lr: 1.2321e-06 lr: 1.2321e-06 eta: 0:13:46 time: 0.9878 data_time: 0.0084 memory: 16131 loss: 1.7812 tflops: 4.2847 tokens_per_sec: 70.8675 +2025/09/25 22:47:16 - mmengine - INFO - Iter(train) [6780/7452] base_lr: 1.2299e-06 lr: 1.2299e-06 eta: 0:13:34 time: 1.0098 data_time: 0.0088 memory: 16132 loss: 1.9453 tflops: 4.3710 tokens_per_sec: 72.2910 +2025/09/25 22:47:27 - mmengine - INFO - Iter(train) [6790/7452] base_lr: 1.2277e-06 lr: 1.2277e-06 eta: 0:13:22 time: 1.0321 data_time: 0.0089 memory: 16131 loss: 1.9375 tflops: 6.7408 tokens_per_sec: 111.4227 +2025/09/25 22:47:38 - mmengine - INFO - Iter(train) [6800/7452] base_lr: 1.2256e-06 lr: 1.2256e-06 eta: 0:13:09 time: 1.0122 data_time: 0.0085 memory: 16130 loss: 1.6953 tflops: 5.1383 tokens_per_sec: 84.9669 +2025/09/25 22:47:48 - mmengine - INFO - Iter(train) [6810/7452] base_lr: 1.2234e-06 lr: 1.2234e-06 eta: 0:12:57 time: 1.0086 data_time: 0.0089 memory: 16131 loss: 1.9766 tflops: 4.2562 tokens_per_sec: 70.3956 +2025/09/25 22:47:59 - mmengine - INFO - Iter(train) [6820/7452] base_lr: 1.2213e-06 lr: 1.2213e-06 eta: 0:12:45 time: 1.0158 data_time: 0.0085 memory: 16130 loss: 2.3281 tflops: 6.6702 tokens_per_sec: 110.2608 +2025/09/25 22:48:10 - mmengine - INFO - Iter(train) [6830/7452] base_lr: 1.2191e-06 lr: 1.2191e-06 eta: 0:12:33 time: 0.9840 data_time: 0.0074 memory: 16131 loss: 1.6172 tflops: 5.5928 tokens_per_sec: 92.4771 +2025/09/25 22:48:20 - mmengine - INFO - Iter(train) [6840/7452] base_lr: 1.2169e-06 lr: 1.2169e-06 eta: 0:12:20 time: 1.0163 data_time: 0.0090 memory: 16131 loss: 0.1118 tflops: 6.1896 tokens_per_sec: 102.3274 +2025/09/25 22:48:31 - mmengine - INFO - Iter(train) [6850/7452] base_lr: 1.2148e-06 lr: 1.2148e-06 eta: 0:12:08 time: 1.0104 data_time: 0.0087 memory: 16131 loss: 2.4375 tflops: 5.6867 tokens_per_sec: 94.0243 +2025/09/25 22:48:42 - mmengine - INFO - Iter(train) [6860/7452] base_lr: 1.2126e-06 lr: 1.2126e-06 eta: 0:11:56 time: 1.0025 data_time: 0.0076 memory: 16131 loss: 0.0019 tflops: 8.5725 tokens_per_sec: 141.6500 +2025/09/25 22:48:53 - mmengine - INFO - Iter(train) [6870/7452] base_lr: 1.2104e-06 lr: 1.2104e-06 eta: 0:11:44 time: 1.2347 data_time: 0.0085 memory: 16130 loss: 1.5781 tflops: 5.3895 tokens_per_sec: 89.0930 +2025/09/25 22:49:03 - mmengine - INFO - Iter(train) [6880/7452] base_lr: 1.2083e-06 lr: 1.2083e-06 eta: 0:11:32 time: 1.0379 data_time: 0.0083 memory: 16131 loss: 0.0013 tflops: 7.6373 tokens_per_sec: 126.2149 +2025/09/25 22:49:14 - mmengine - INFO - Iter(train) [6890/7452] base_lr: 1.2061e-06 lr: 1.2061e-06 eta: 0:11:19 time: 1.0379 data_time: 0.0086 memory: 16131 loss: 1.7656 tflops: 4.7776 tokens_per_sec: 79.0062 +2025/09/25 22:49:25 - mmengine - INFO - Iter(train) [6900/7452] base_lr: 1.2039e-06 lr: 1.2039e-06 eta: 0:11:07 time: 1.0156 data_time: 0.0090 memory: 16131 loss: 0.0114 tflops: 7.7451 tokens_per_sec: 127.9991 +2025/09/25 22:49:36 - mmengine - INFO - Iter(train) [6910/7452] base_lr: 1.2017e-06 lr: 1.2017e-06 eta: 0:10:55 time: 1.0266 data_time: 0.0086 memory: 16131 loss: 0.0327 tflops: 7.4261 tokens_per_sec: 122.7332 +2025/09/25 22:49:47 - mmengine - INFO - Iter(train) [6920/7452] base_lr: 1.1996e-06 lr: 1.1996e-06 eta: 0:10:43 time: 1.0217 data_time: 0.0087 memory: 16130 loss: 0.0923 tflops: 6.5130 tokens_per_sec: 107.6651 +2025/09/25 22:49:58 - mmengine - INFO - Iter(train) [6930/7452] base_lr: 1.1974e-06 lr: 1.1974e-06 eta: 0:10:31 time: 1.0247 data_time: 0.0092 memory: 16132 loss: 0.0157 tflops: 5.2526 tokens_per_sec: 86.8531 +2025/09/25 22:50:09 - mmengine - INFO - Iter(train) [6940/7452] base_lr: 1.1952e-06 lr: 1.1952e-06 eta: 0:10:18 time: 1.0118 data_time: 0.0082 memory: 16130 loss: 1.8984 tflops: 6.1573 tokens_per_sec: 101.7951 +2025/09/25 22:50:19 - mmengine - INFO - Iter(train) [6950/7452] base_lr: 1.1930e-06 lr: 1.1930e-06 eta: 0:10:06 time: 1.0236 data_time: 0.0089 memory: 16132 loss: 0.0067 tflops: 7.2108 tokens_per_sec: 119.1814 +2025/09/25 22:50:30 - mmengine - INFO - Iter(train) [6960/7452] base_lr: 1.1909e-06 lr: 1.1909e-06 eta: 0:09:54 time: 1.2292 data_time: 0.0090 memory: 16133 loss: 1.2891 tflops: 3.3445 tokens_per_sec: 55.3189 +2025/09/25 22:50:41 - mmengine - INFO - Iter(train) [6970/7452] base_lr: 1.1887e-06 lr: 1.1887e-06 eta: 0:09:42 time: 1.0134 data_time: 0.0084 memory: 16131 loss: 1.6328 tflops: 5.5500 tokens_per_sec: 91.7668 +2025/09/25 22:50:52 - mmengine - INFO - Iter(train) [6980/7452] base_lr: 1.1865e-06 lr: 1.1865e-06 eta: 0:09:30 time: 1.0289 data_time: 0.0086 memory: 16131 loss: 2.4531 tflops: 6.2317 tokens_per_sec: 103.0203 +2025/09/25 22:51:03 - mmengine - INFO - Iter(train) [6990/7452] base_lr: 1.1843e-06 lr: 1.1843e-06 eta: 0:09:18 time: 1.2919 data_time: 0.0090 memory: 16131 loss: 1.9062 tflops: 5.1977 tokens_per_sec: 85.9215 +2025/09/25 22:51:14 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 22:51:14 - mmengine - INFO - Iter(train) [7000/7452] base_lr: 1.1821e-06 lr: 1.1821e-06 eta: 0:09:05 time: 1.0343 data_time: 0.0082 memory: 16130 loss: 1.5312 tflops: 4.1503 tokens_per_sec: 68.6430 +2025/09/25 22:51:24 - mmengine - INFO - Iter(train) [7010/7452] base_lr: 1.1800e-06 lr: 1.1800e-06 eta: 0:08:53 time: 0.9977 data_time: 0.0087 memory: 16131 loss: 0.0334 tflops: 5.5162 tokens_per_sec: 91.2103 +2025/09/25 22:51:35 - mmengine - INFO - Iter(train) [7020/7452] base_lr: 1.1778e-06 lr: 1.1778e-06 eta: 0:08:41 time: 1.1981 data_time: 0.0086 memory: 16131 loss: 1.8359 tflops: 7.8305 tokens_per_sec: 129.3666 +2025/09/25 22:51:46 - mmengine - INFO - Iter(train) [7030/7452] base_lr: 1.1756e-06 lr: 1.1756e-06 eta: 0:08:29 time: 1.0255 data_time: 0.0090 memory: 16131 loss: 0.0035 tflops: 7.1979 tokens_per_sec: 118.9684 +2025/09/25 22:51:57 - mmengine - INFO - Iter(train) [7040/7452] base_lr: 1.1734e-06 lr: 1.1734e-06 eta: 0:08:17 time: 1.2671 data_time: 0.0087 memory: 16131 loss: 0.0039 tflops: 6.7345 tokens_per_sec: 111.2813 +2025/09/25 22:52:07 - mmengine - INFO - Iter(train) [7050/7452] base_lr: 1.1712e-06 lr: 1.1712e-06 eta: 0:08:05 time: 1.0657 data_time: 0.0087 memory: 16130 loss: 1.8750 tflops: 5.3915 tokens_per_sec: 89.1428 +2025/09/25 22:52:18 - mmengine - INFO - Iter(train) [7060/7452] base_lr: 1.1690e-06 lr: 1.1690e-06 eta: 0:07:53 time: 1.0214 data_time: 0.0087 memory: 16130 loss: 1.5000 tflops: 5.6252 tokens_per_sec: 93.0071 +2025/09/25 22:52:29 - mmengine - INFO - Iter(train) [7070/7452] base_lr: 1.1668e-06 lr: 1.1668e-06 eta: 0:07:40 time: 1.2628 data_time: 0.0090 memory: 16131 loss: 0.0112 tflops: 5.6533 tokens_per_sec: 93.4440 +2025/09/25 22:52:40 - mmengine - INFO - Iter(train) [7080/7452] base_lr: 1.1647e-06 lr: 1.1647e-06 eta: 0:07:28 time: 1.2539 data_time: 0.0089 memory: 16131 loss: 0.0047 tflops: 5.8385 tokens_per_sec: 96.5011 +2025/09/25 22:52:50 - mmengine - INFO - Iter(train) [7090/7452] base_lr: 1.1625e-06 lr: 1.1625e-06 eta: 0:07:16 time: 1.0162 data_time: 0.0087 memory: 16130 loss: 1.3750 tflops: 4.4030 tokens_per_sec: 72.8201 +2025/09/25 22:53:01 - mmengine - INFO - Iter(train) [7100/7452] base_lr: 1.1603e-06 lr: 1.1603e-06 eta: 0:07:04 time: 1.0333 data_time: 0.0086 memory: 16131 loss: 2.4219 tflops: 6.3227 tokens_per_sec: 104.5228 +2025/09/25 22:53:12 - mmengine - INFO - Iter(train) [7110/7452] base_lr: 1.1581e-06 lr: 1.1581e-06 eta: 0:06:52 time: 1.2934 data_time: 0.0089 memory: 16131 loss: 2.0938 tflops: 3.8338 tokens_per_sec: 63.3990 +2025/09/25 22:53:23 - mmengine - INFO - Iter(train) [7120/7452] base_lr: 1.1559e-06 lr: 1.1559e-06 eta: 0:06:40 time: 1.0114 data_time: 0.0087 memory: 16130 loss: 1.5156 tflops: 5.0823 tokens_per_sec: 84.0428 +2025/09/25 22:53:33 - mmengine - INFO - Iter(train) [7130/7452] base_lr: 1.1537e-06 lr: 1.1537e-06 eta: 0:06:28 time: 0.9708 data_time: 0.0075 memory: 16131 loss: 0.0342 tflops: 6.6048 tokens_per_sec: 109.1890 +2025/09/25 22:53:44 - mmengine - INFO - Iter(train) [7140/7452] base_lr: 1.1515e-06 lr: 1.1515e-06 eta: 0:06:16 time: 1.2588 data_time: 0.0086 memory: 16130 loss: 1.5234 tflops: 3.6507 tokens_per_sec: 60.3769 +2025/09/25 22:53:54 - mmengine - INFO - Iter(train) [7150/7452] base_lr: 1.1493e-06 lr: 1.1493e-06 eta: 0:06:03 time: 1.0187 data_time: 0.0083 memory: 16130 loss: 0.0562 tflops: 6.7703 tokens_per_sec: 111.9121 +2025/09/25 22:54:05 - mmengine - INFO - Iter(train) [7160/7452] base_lr: 1.1471e-06 lr: 1.1471e-06 eta: 0:05:51 time: 0.9587 data_time: 0.0076 memory: 16131 loss: 0.0128 tflops: 6.6250 tokens_per_sec: 109.5240 +2025/09/25 22:54:14 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 22:54:14 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:54:18 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a manner that resembles the lobular structures of normal breast tissue.<|im_end|> + +2025/09/25 22:54:18 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:55:09 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image reveals a well-demarcated area of neoplastic tissue, characterized by the presence of neoplastic cells with a high nuclear grade, marked pleomorphism, and a high mitotic rate. The neoplastic cells exhibit a high nuclear grade, with prominent nucleoli and irregular nuclear contours. The tumor cells are arranged in sheets and nests, with areas of coalescence and areas of necrosis. The tumor cells show marked pleomorphism, with significant variation in cell size and shape. The mitotic rate is high, with numerous mitotic figures observed. The tumor cells are also characterized by a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear atypia, with irregular nuclear contours and prominent nucleoli. The tumor cells are also noted to have a high degree of anisotropy, with irregular cell membranes and cytoplasmic inclusions. The tumor cells are also noted to have a high degree of nuclear at + +2025/09/25 22:55:09 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 22:55:10 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 22:55:14 - mmengine - INFO - Iter(train) [7170/7452] base_lr: 1.1449e-06 lr: 1.1449e-06 eta: 0:05:41 time: 1.0350 data_time: 0.0087 memory: 16131 loss: 0.0077 tflops: 6.6048 tokens_per_sec: 109.1779 +2025/09/25 22:55:24 - mmengine - INFO - Iter(train) [7180/7452] base_lr: 1.1427e-06 lr: 1.1427e-06 eta: 0:05:29 time: 1.0313 data_time: 0.0086 memory: 16131 loss: 1.6250 tflops: 6.6873 tokens_per_sec: 110.5402 +2025/09/25 22:55:35 - mmengine - INFO - Iter(train) [7190/7452] base_lr: 1.1405e-06 lr: 1.1405e-06 eta: 0:05:17 time: 1.2785 data_time: 0.0084 memory: 16131 loss: 1.4453 tflops: 2.9318 tokens_per_sec: 48.4962 +2025/09/25 22:55:45 - mmengine - INFO - Iter(train) [7200/7452] base_lr: 1.1383e-06 lr: 1.1383e-06 eta: 0:05:05 time: 1.1909 data_time: 0.0087 memory: 16131 loss: 1.2266 tflops: 6.3506 tokens_per_sec: 104.9592 +2025/09/25 22:55:56 - mmengine - INFO - Iter(train) [7210/7452] base_lr: 1.1361e-06 lr: 1.1361e-06 eta: 0:04:53 time: 1.0246 data_time: 0.0084 memory: 16131 loss: 0.0142 tflops: 6.7902 tokens_per_sec: 112.2394 +2025/09/25 22:56:07 - mmengine - INFO - Iter(train) [7220/7452] base_lr: 1.1339e-06 lr: 1.1339e-06 eta: 0:04:41 time: 1.0156 data_time: 0.0090 memory: 16131 loss: 0.0317 tflops: 7.9838 tokens_per_sec: 131.9361 +2025/09/25 22:56:18 - mmengine - INFO - Iter(train) [7230/7452] base_lr: 1.1317e-06 lr: 1.1317e-06 eta: 0:04:28 time: 1.0287 data_time: 0.0089 memory: 16131 loss: 1.7109 tflops: 5.1732 tokens_per_sec: 85.5416 +2025/09/25 22:56:29 - mmengine - INFO - Iter(train) [7240/7452] base_lr: 1.1295e-06 lr: 1.1295e-06 eta: 0:04:16 time: 1.0184 data_time: 0.0087 memory: 16130 loss: 2.0000 tflops: 4.1557 tokens_per_sec: 68.7329 +2025/09/25 22:56:39 - mmengine - INFO - Iter(train) [7250/7452] base_lr: 1.1273e-06 lr: 1.1273e-06 eta: 0:04:04 time: 1.0010 data_time: 0.0093 memory: 16131 loss: 0.0133 tflops: 7.1921 tokens_per_sec: 118.8765 +2025/09/25 22:56:50 - mmengine - INFO - Iter(train) [7260/7452] base_lr: 1.1251e-06 lr: 1.1251e-06 eta: 0:03:52 time: 1.0140 data_time: 0.0090 memory: 16130 loss: 2.3750 tflops: 6.8613 tokens_per_sec: 113.4158 +2025/09/25 22:57:00 - mmengine - INFO - Iter(train) [7270/7452] base_lr: 1.1229e-06 lr: 1.1229e-06 eta: 0:03:40 time: 1.0033 data_time: 0.0082 memory: 16131 loss: 1.6250 tflops: 8.2032 tokens_per_sec: 135.5581 +2025/09/25 22:57:11 - mmengine - INFO - Iter(train) [7280/7452] base_lr: 1.1207e-06 lr: 1.1207e-06 eta: 0:03:28 time: 0.9987 data_time: 0.0086 memory: 16131 loss: 0.0254 tflops: 7.6947 tokens_per_sec: 127.1703 +2025/09/25 22:57:21 - mmengine - INFO - Iter(train) [7290/7452] base_lr: 1.1185e-06 lr: 1.1185e-06 eta: 0:03:16 time: 1.0672 data_time: 0.0087 memory: 16131 loss: 0.0315 tflops: 5.4406 tokens_per_sec: 89.9550 +2025/09/25 22:57:32 - mmengine - INFO - Iter(train) [7300/7452] base_lr: 1.1163e-06 lr: 1.1163e-06 eta: 0:03:03 time: 1.0142 data_time: 0.0086 memory: 16130 loss: 0.1729 tflops: 6.1430 tokens_per_sec: 101.5580 +2025/09/25 22:57:43 - mmengine - INFO - Iter(train) [7310/7452] base_lr: 1.1141e-06 lr: 1.1141e-06 eta: 0:02:51 time: 1.0428 data_time: 0.0087 memory: 16130 loss: 2.0000 tflops: 4.9872 tokens_per_sec: 82.4687 +2025/09/25 22:57:54 - mmengine - INFO - Iter(train) [7320/7452] base_lr: 1.1119e-06 lr: 1.1119e-06 eta: 0:02:39 time: 1.3713 data_time: 0.0083 memory: 16130 loss: 1.9531 tflops: 3.7924 tokens_per_sec: 62.7121 +2025/09/25 22:58:05 - mmengine - INFO - Iter(train) [7330/7452] base_lr: 1.1097e-06 lr: 1.1097e-06 eta: 0:02:27 time: 1.2626 data_time: 0.0085 memory: 16131 loss: 2.2188 tflops: 3.0644 tokens_per_sec: 50.6875 +2025/09/25 22:58:15 - mmengine - INFO - Iter(train) [7340/7452] base_lr: 1.1075e-06 lr: 1.1075e-06 eta: 0:02:15 time: 1.0008 data_time: 0.0085 memory: 16131 loss: 0.0354 tflops: 7.8601 tokens_per_sec: 129.8989 +2025/09/25 22:58:26 - mmengine - INFO - Iter(train) [7350/7452] base_lr: 1.1053e-06 lr: 1.1053e-06 eta: 0:02:03 time: 1.0189 data_time: 0.0087 memory: 16131 loss: 0.0483 tflops: 5.4606 tokens_per_sec: 90.2902 +2025/09/25 22:58:36 - mmengine - INFO - Iter(train) [7360/7452] base_lr: 1.1031e-06 lr: 1.1031e-06 eta: 0:01:51 time: 1.3049 data_time: 0.0084 memory: 16130 loss: 0.0053 tflops: 5.6103 tokens_per_sec: 92.7290 +2025/09/25 22:58:47 - mmengine - INFO - Iter(train) [7370/7452] base_lr: 1.1009e-06 lr: 1.1009e-06 eta: 0:01:39 time: 1.0074 data_time: 0.0088 memory: 16130 loss: 0.0283 tflops: 7.5676 tokens_per_sec: 125.0725 +2025/09/25 22:58:58 - mmengine - INFO - Iter(train) [7380/7452] base_lr: 1.0987e-06 lr: 1.0987e-06 eta: 0:01:27 time: 1.2364 data_time: 0.0086 memory: 16131 loss: 0.0093 tflops: 5.1858 tokens_per_sec: 85.7301 +2025/09/25 22:59:08 - mmengine - INFO - Iter(train) [7390/7452] base_lr: 1.0965e-06 lr: 1.0965e-06 eta: 0:01:14 time: 1.0541 data_time: 0.0085 memory: 16130 loss: 1.4141 tflops: 4.3022 tokens_per_sec: 71.1513 +2025/09/25 22:59:19 - mmengine - INFO - Iter(train) [7400/7452] base_lr: 1.0943e-06 lr: 1.0943e-06 eta: 0:01:02 time: 1.0012 data_time: 0.0084 memory: 16131 loss: 0.0094 tflops: 6.9490 tokens_per_sec: 114.8655 +2025/09/25 22:59:30 - mmengine - INFO - Iter(train) [7410/7452] base_lr: 1.0921e-06 lr: 1.0921e-06 eta: 0:00:50 time: 1.0106 data_time: 0.0084 memory: 16131 loss: 0.0894 tflops: 6.6446 tokens_per_sec: 109.8396 +2025/09/25 22:59:41 - mmengine - INFO - Iter(train) [7420/7452] base_lr: 1.0899e-06 lr: 1.0899e-06 eta: 0:00:38 time: 1.0328 data_time: 0.0081 memory: 16131 loss: 0.0203 tflops: 6.5601 tokens_per_sec: 108.4403 +2025/09/25 22:59:52 - mmengine - INFO - Iter(train) [7430/7452] base_lr: 1.0876e-06 lr: 1.0876e-06 eta: 0:00:26 time: 1.0209 data_time: 0.0082 memory: 16131 loss: 0.0039 tflops: 5.3906 tokens_per_sec: 89.1329 +2025/09/25 23:00:02 - mmengine - INFO - Iter(train) [7440/7452] base_lr: 1.0854e-06 lr: 1.0854e-06 eta: 0:00:14 time: 1.0587 data_time: 0.0087 memory: 16130 loss: 0.1104 tflops: 5.7701 tokens_per_sec: 95.3963 +2025/09/25 23:00:13 - mmengine - INFO - Iter(train) [7450/7452] base_lr: 1.0832e-06 lr: 1.0832e-06 eta: 0:00:02 time: 1.0463 data_time: 0.0082 memory: 16131 loss: 2.1875 tflops: 5.7229 tokens_per_sec: 94.6180 +2025/09/25 23:00:15 - mmengine - INFO - Exp name: temp_config_stage_2a_20250925_202658 +2025/09/25 23:00:15 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 23:00:15 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:00:17 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and separated by fibrous stroma.<|im_end|> + +2025/09/25 23:00:17 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:00:35 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image reveals a well-demarcated area of neoplastic tissue, characterized by the presence of malignant cells. These cells exhibit marked pleomorphism, with significant variation in cell size and shape. The neoplastic cells are arranged in irregular clusters and cords, with areas of necrosis noted within the tumor. The tumor cells show marked atypia, with irregular nuclear contours, prominent nucleoli, and frequent mitotic figures. The tumor displays a high degree of cellular atypia, indicative of a poorly differentiated adenocarcinoma. The tumor cells infiltrate the surrounding stroma, with evidence of vascular invasion. The tumor also shows invasion into the adjacent lung parenchyma. The tumor cells express positivity for cytokeratin, indicating their epithelial origin. The absence of estrogen and progesterone receptors suggests that the tumor is hormone receptor negative. The Ki-67 proliferation index is elevated, indicating a high proliferative activity. The tumor cells show no evidence of neuroendocrine differentiation.<|im_end|> + +2025/09/25 23:00:35 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:00:36 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 23:00:36 - mmengine - INFO - Saving checkpoint at 7452 iterations +2025/09/25 23:01:23 - mmengine - INFO - after_train in EvaluateChatHook. +2025/09/25 23:01:23 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:01:27 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a manner that resembles the lobular structures of normal breast tissue.<|im_end|> + +2025/09/25 23:01:27 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:02:16 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image reveals a well-demarcated area of neoplastic tissue, characterized by the presence of neoplastic cells. These cells exhibit a high degree of atypia, with marked nuclear pleomorphism and hyperchromasia. The neoplastic cells are arranged in a diffuse pattern, with areas of solid and glandular formations. The tumor cells show a high mitotic rate, indicating rapid proliferation. The stroma surrounding the neoplastic cells is dense and fibrotic. The tumor cells are positive for estrogen receptor (ER) and progesterone receptor (PR), and negative for HER2/neu. The tumor cells are also positive for p53, indicating potential genetic instability. The tumor cells are negative for p16, suggesting a lack of p16 expression. The tumor cells are positive for Ki-67, indicating a high proliferation index. The tumor cells are negative for p53, indicating a lack of p53 overexpression. The tumor cells are positive for p16, indicating p16 expression. The tumor cells are negative for Ki-67, indicating a low proliferation index. The tumor cells are positive for p53, indicating p53 expression. The tumor cells are negative for p16, indicating a lack of p16 expression. The tumor cells are positive for Ki-67, indicating a high proliferation index. The tumor cells are negative for p53, indicating a lack of p53 overexpression. The tumor cells are positive for p16, indicating p16 expression. The tumor cells are negative for Ki-67, indicating a low proliferation index. The tumor cells are positive for p53, indicating p53 expression. The tumor cells are negative for p16, indicating a lack of p16 expression. The tumor cells are positive for Ki-67, indicating a high proliferation index. The tumor cells are negative for p53, indicating a lack of p53 overexpression. The tumor cells are positive for p16, indicating p16 expression. The tumor cells are negative for Ki-67, indicating a low proliferation index. The tumor cells are positive for p53, indicating p53 expression. The tumor cells are negative for p16, indicating a lack of p16 expression. The tumor cells are positive for Ki-67, indicating a high proliferation index. The tumor cells are negative for p53, indicating a lack of p53 overexpression. The tumor cells are positive for p16, indicating p16 expression. The tumor cells are negative for Ki-67, indicating a low proliferation index. The tumor cells are positive for p53, indicating p53 expression. The tumor cells are negative for p16, indicating a lack of p16 expression. The tumor cells are positive for Ki-67, indicating a high proliferation index. The + +2025/09/25 23:02:16 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:02:17 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/20250925_202658.json b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/20250925_202658.json new file mode 100644 index 0000000000000000000000000000000000000000..1125c760b5dce9c192552c9e18265acfa31e6221 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/20250925_202658.json @@ -0,0 +1,745 @@ +{"base_lr": 4.395161290322582e-08, "lr": 4.395161290322582e-08, "data_time": 0.008252859115600586, "loss": 2.15625, "time": 1.0588200092315674, "tflops": 3.5971065187335634, "tokens_per_sec": 59.500197815171994, "iter": 10, "memory": 15740, "step": 10} +{"base_lr": 7.056451612903229e-08, "lr": 7.056451612903229e-08, "data_time": 0.00792551040649414, "loss": 0.326171875, "time": 1.0317912101745605, "tflops": 7.565075109627917, "tokens_per_sec": 125.02529458266123, "iter": 20, "memory": 15740, "step": 20} +{"base_lr": 9.717741935483876e-08, "lr": 9.717741935483876e-08, "data_time": 0.0076558589935302734, "loss": 1.6875, "time": 1.0424790382385254, "tflops": 4.001761948860862, "tokens_per_sec": 66.18838122301524, "iter": 30, "memory": 15740, "step": 30} +{"base_lr": 1.237903225806452e-07, "lr": 1.237903225806452e-07, "data_time": 0.00793910026550293, "loss": 1.7734375, "time": 1.0455522537231445, "tflops": 4.2215280057542985, "tokens_per_sec": 69.8195616144309, "iter": 40, "memory": 15739, "step": 40} +{"base_lr": 1.504032258064516e-07, "lr": 1.504032258064516e-07, "data_time": 0.00835728645324707, "loss": 0.5546875, "time": 1.061682939529419, "tflops": 6.438827027209791, "tokens_per_sec": 106.4347893260673, "iter": 50, "memory": 15742, "step": 50} +{"base_lr": 1.7701612903225805e-07, "lr": 1.7701612903225805e-07, "data_time": 0.008299589157104492, "loss": 0.59375, "time": 1.0494346618652344, "tflops": 6.052167493901179, "tokens_per_sec": 100.05387073195773, "iter": 60, "memory": 15740, "step": 60} +{"base_lr": 2.036290322580645e-07, "lr": 2.036290322580645e-07, "data_time": 0.008434057235717773, "loss": 0.3125, "time": 1.0530741214752197, "tflops": 5.973731228731991, "tokens_per_sec": 98.75848041371559, "iter": 70, "memory": 16130, "step": 70} +{"base_lr": 2.3024193548387097e-07, "lr": 2.3024193548387097e-07, "data_time": 0.00812673568725586, "loss": 0.39453125, "time": 1.047574520111084, "tflops": 6.756894553634868, "tokens_per_sec": 111.68656525502522, "iter": 80, "memory": 16130, "step": 80} +{"base_lr": 2.568548387096774e-07, "lr": 2.568548387096774e-07, "data_time": 0.008737802505493164, "loss": 2.53125, "time": 1.1176350116729736, "tflops": 5.0867628493464006, "tokens_per_sec": 84.10616974069958, "iter": 90, "memory": 16131, "step": 90} +{"base_lr": 2.8346774193548383e-07, "lr": 2.8346774193548383e-07, "data_time": 0.008491754531860352, "loss": 0.28125, "time": 1.0414159297943115, "tflops": 6.505957115293689, "tokens_per_sec": 107.54588709048592, "iter": 100, "memory": 16130, "step": 100} +{"base_lr": 3.100806451612901e-07, "lr": 3.100806451612901e-07, "data_time": 0.008758068084716797, "loss": 0.9375, "time": 1.0598418712615967, "tflops": 5.64984560719282, "tokens_per_sec": 93.41016116117457, "iter": 110, "memory": 16131, "step": 110} +{"base_lr": 3.366935483870966e-07, "lr": 3.366935483870966e-07, "data_time": 0.008307933807373047, "loss": 1.265625, "time": 1.166724443435669, "tflops": 3.2644283564997063, "tokens_per_sec": 53.99732589336096, "iter": 120, "memory": 16131, "step": 120} +{"base_lr": 3.63306451612903e-07, "lr": 3.63306451612903e-07, "data_time": 0.008579492568969727, "loss": 1.75, "time": 1.090517282485962, "tflops": 2.383172245238646, "tokens_per_sec": 39.43082855315876, "iter": 130, "memory": 16130, "step": 130} +{"base_lr": 3.8991935483870945e-07, "lr": 3.8991935483870945e-07, "data_time": 0.008089303970336914, "loss": 0.19921875, "time": 1.2434301376342773, "tflops": 6.033743470091637, "tokens_per_sec": 99.72413909463376, "iter": 140, "memory": 16131, "step": 140} +{"base_lr": 4.1653225806451577e-07, "lr": 4.1653225806451577e-07, "data_time": 0.008330821990966797, "loss": 1.8515625, "time": 1.0202295780181885, "tflops": 5.216314970703492, "tokens_per_sec": 86.2550958097638, "iter": 150, "memory": 16130, "step": 150} +{"base_lr": 4.4314516129032225e-07, "lr": 4.4314516129032225e-07, "data_time": 0.007828474044799805, "loss": 1.7890625, "time": 1.039480209350586, "tflops": 3.838662238398019, "tokens_per_sec": 63.49327231652628, "iter": 160, "memory": 16130, "step": 160} +{"base_lr": 4.697580645161287e-07, "lr": 4.697580645161287e-07, "data_time": 0.00848531723022461, "loss": 2.4375, "time": 1.0166044235229492, "tflops": 4.163142747116224, "tokens_per_sec": 68.85667461228682, "iter": 170, "memory": 16131, "step": 170} +{"base_lr": 4.963709677419353e-07, "lr": 4.963709677419353e-07, "data_time": 0.00827169418334961, "loss": 0.361328125, "time": 1.0777912139892578, "tflops": 6.736124452899487, "tokens_per_sec": 111.33881817029237, "iter": 180, "memory": 16131, "step": 180} +{"base_lr": 5.229838709677418e-07, "lr": 5.229838709677418e-07, "data_time": 0.008142948150634766, "loss": 0.75, "time": 1.0406582355499268, "tflops": 6.685353787141135, "tokens_per_sec": 110.50698113114795, "iter": 190, "memory": 16131, "step": 190} +{"base_lr": 5.49596774193548e-07, "lr": 5.49596774193548e-07, "data_time": 0.008439064025878906, "loss": 0.84375, "time": 1.0235364437103271, "tflops": 6.205303569348335, "tokens_per_sec": 102.58550210412795, "iter": 200, "memory": 16131, "step": 200} +{"base_lr": 5.762096774193546e-07, "lr": 5.762096774193546e-07, "data_time": 0.008361339569091797, "loss": 0.306640625, "time": 1.2279424667358398, "tflops": 5.172354991841126, "tokens_per_sec": 85.50889218696801, "iter": 210, "memory": 16130, "step": 210} +{"base_lr": 6.028225806451608e-07, "lr": 6.028225806451608e-07, "data_time": 0.008082389831542969, "loss": 0.4140625, "time": 1.0067017078399658, "tflops": 6.971040840936602, "tokens_per_sec": 115.2277771026938, "iter": 220, "memory": 16132, "step": 220} +{"base_lr": 6.294354838709672e-07, "lr": 6.294354838709672e-07, "data_time": 0.008601903915405273, "loss": 1.3046875, "time": 1.0299232006072998, "tflops": 4.050547599323962, "tokens_per_sec": 66.99528659927923, "iter": 230, "memory": 16131, "step": 230} +{"base_lr": 6.560483870967735e-07, "lr": 6.560483870967735e-07, "data_time": 0.008841991424560547, "loss": 2.09375, "time": 1.0509626865386963, "tflops": 6.677457544288305, "tokens_per_sec": 110.37499378967581, "iter": 240, "memory": 16131, "step": 240} +{"base_lr": 6.826612903225798e-07, "lr": 6.826612903225798e-07, "data_time": 0.008244752883911133, "loss": 1.40625, "time": 1.02738356590271, "tflops": 3.5893893913674124, "tokens_per_sec": 59.37412474214828, "iter": 250, "memory": 16130, "step": 250} +{"base_lr": 7.092741935483862e-07, "lr": 7.092741935483862e-07, "data_time": 0.007828235626220703, "loss": 1.984375, "time": 1.2792456150054932, "tflops": 4.870222509493908, "tokens_per_sec": 80.51620329335832, "iter": 260, "memory": 16131, "step": 260} +{"base_lr": 7.358870967741926e-07, "lr": 7.358870967741926e-07, "data_time": 0.008242130279541016, "loss": 0.294921875, "time": 1.1922528743743896, "tflops": 6.699435295198208, "tokens_per_sec": 110.71476767808471, "iter": 270, "memory": 16131, "step": 270} +{"base_lr": 7.624999999999988e-07, "lr": 7.624999999999988e-07, "data_time": 0.00824737548828125, "loss": 2.5, "time": 1.2521777153015137, "tflops": 4.588566552385814, "tokens_per_sec": 75.86782518090808, "iter": 280, "memory": 16131, "step": 280} +{"base_lr": 7.891129032258054e-07, "lr": 7.891129032258054e-07, "data_time": 0.007684469223022461, "loss": 0.75390625, "time": 1.020216941833496, "tflops": 6.403620111648591, "tokens_per_sec": 105.85983781625949, "iter": 290, "memory": 16131, "step": 290} +{"base_lr": 8.157258064516119e-07, "lr": 8.157258064516119e-07, "data_time": 0.00827336311340332, "loss": 2.78125, "time": 1.0804007053375244, "tflops": 4.8137231205253945, "tokens_per_sec": 79.60009612642136, "iter": 300, "memory": 16130, "step": 300} +{"base_lr": 8.423387096774183e-07, "lr": 8.423387096774183e-07, "data_time": 0.007940530776977539, "loss": 0.52734375, "time": 1.2500593662261963, "tflops": 4.935478734400104, "tokens_per_sec": 81.59612475673548, "iter": 310, "memory": 16130, "step": 310} +{"base_lr": 8.689516129032248e-07, "lr": 8.689516129032248e-07, "data_time": 0.008423566818237305, "loss": 0.146484375, "time": 1.318004846572876, "tflops": 7.256421607126407, "tokens_per_sec": 119.8781631271823, "iter": 320, "memory": 16131, "step": 320} +{"base_lr": 8.955645161290312e-07, "lr": 8.955645161290312e-07, "data_time": 0.008231878280639648, "loss": 0.267578125, "time": 1.0330135822296143, "tflops": 6.910793959901945, "tokens_per_sec": 114.22889498238676, "iter": 330, "memory": 16130, "step": 330} +{"base_lr": 9.221774193548377e-07, "lr": 9.221774193548377e-07, "data_time": 0.008239507675170898, "loss": 0.5859375, "time": 1.040961742401123, "tflops": 6.217811918098017, "tokens_per_sec": 102.78956050112545, "iter": 340, "memory": 16132, "step": 340} +{"base_lr": 9.487903225806442e-07, "lr": 9.487903225806442e-07, "data_time": 0.00878286361694336, "loss": 0.203125, "time": 1.02880859375, "tflops": 6.350143035991382, "tokens_per_sec": 104.97579496904841, "iter": 350, "memory": 16131, "step": 350} +{"base_lr": 9.754032258064507e-07, "lr": 9.754032258064507e-07, "data_time": 0.008882761001586914, "loss": 0.140625, "time": 1.0657086372375488, "tflops": 6.9262213361040414, "tokens_per_sec": 114.47781854909697, "iter": 360, "memory": 16131, "step": 360} +{"base_lr": 1.0020161290322574e-06, "lr": 1.0020161290322574e-06, "data_time": 0.00839543342590332, "loss": 1.859375, "time": 1.0951733589172363, "tflops": 7.95758713736447, "tokens_per_sec": 131.48603262431146, "iter": 370, "memory": 16131, "step": 370} +{"base_lr": 1.028629032258064e-06, "lr": 1.028629032258064e-06, "data_time": 0.00841975212097168, "loss": 2.09375, "time": 1.0826020240783691, "tflops": 6.706190816384188, "tokens_per_sec": 110.84405657013846, "iter": 380, "memory": 16131, "step": 380} +{"base_lr": 1.0552419354838708e-06, "lr": 1.0552419354838708e-06, "data_time": 0.008339166641235352, "loss": 0.306640625, "time": 1.0399360656738281, "tflops": 7.447541383565911, "tokens_per_sec": 123.08448973441374, "iter": 390, "memory": 16131, "step": 390} +{"base_lr": 1.0818548387096769e-06, "lr": 1.0818548387096769e-06, "data_time": 0.008753538131713867, "loss": 0.2158203125, "time": 1.0642521381378174, "tflops": 7.391284868712155, "tokens_per_sec": 122.1515046494022, "iter": 400, "memory": 16131, "step": 400} +{"base_lr": 1.1084677419354831e-06, "lr": 1.1084677419354831e-06, "data_time": 0.009238243103027344, "loss": 2.359375, "time": 1.0673494338989258, "tflops": 6.631708740041046, "tokens_per_sec": 109.61733457102284, "iter": 410, "memory": 16130, "step": 410} +{"base_lr": 1.1350806451612896e-06, "lr": 1.1350806451612896e-06, "data_time": 0.007996797561645508, "loss": 2.5625, "time": 1.0594053268432617, "tflops": 4.39485139971969, "tokens_per_sec": 72.68228509796742, "iter": 420, "memory": 16131, "step": 420} +{"base_lr": 1.1616935483870963e-06, "lr": 1.1616935483870963e-06, "data_time": 0.008247852325439453, "loss": 0.259765625, "time": 1.0520517826080322, "tflops": 6.325013132849636, "tokens_per_sec": 104.55759100298833, "iter": 430, "memory": 16131, "step": 430} +{"base_lr": 1.188306451612903e-06, "lr": 1.188306451612903e-06, "data_time": 0.008158683776855469, "loss": 0.265625, "time": 1.0134432315826416, "tflops": 6.685532220531496, "tokens_per_sec": 110.51433026493739, "iter": 440, "memory": 16131, "step": 440} +{"base_lr": 1.2149193548387095e-06, "lr": 1.2149193548387095e-06, "data_time": 0.008710145950317383, "loss": 0.1796875, "time": 1.0389196872711182, "tflops": 7.979947673722839, "tokens_per_sec": 131.8677484683341, "iter": 450, "memory": 16131, "step": 450} +{"base_lr": 1.2415322580645158e-06, "lr": 1.2415322580645158e-06, "data_time": 0.008257865905761719, "loss": 1.359375, "time": 1.3634393215179443, "tflops": 3.459235874260699, "tokens_per_sec": 57.20826645450112, "iter": 460, "memory": 16131, "step": 460} +{"base_lr": 1.2681451612903222e-06, "lr": 1.2681451612903222e-06, "data_time": 0.008691549301147461, "loss": 2.609375, "time": 1.0664176940917969, "tflops": 4.309194591214387, "tokens_per_sec": 71.2666344725772, "iter": 470, "memory": 16130, "step": 470} +{"base_lr": 1.2947580645161285e-06, "lr": 1.2947580645161285e-06, "data_time": 0.008431434631347656, "loss": 0.30859375, "time": 1.041290521621704, "tflops": 6.506740662417137, "tokens_per_sec": 107.55883941540525, "iter": 480, "memory": 16131, "step": 480} +{"base_lr": 1.3213709677419352e-06, "lr": 1.3213709677419352e-06, "data_time": 0.007923126220703125, "loss": 0.314453125, "time": 1.4173884391784668, "tflops": 6.020252748314934, "tokens_per_sec": 99.47872869742439, "iter": 490, "memory": 16131, "step": 490} +{"base_lr": 1.347983870967742e-06, "lr": 1.347983870967742e-06, "data_time": 0.008606433868408203, "loss": 2.4375, "time": 1.1120550632476807, "tflops": 6.147171153023487, "tokens_per_sec": 101.61367340020881, "iter": 500, "memory": 16131, "step": 500} +{"base_lr": 1.3745967741935486e-06, "lr": 1.3745967741935486e-06, "data_time": 0.008076906204223633, "loss": 1.6484375, "time": 1.1908760070800781, "tflops": 5.079040520117054, "tokens_per_sec": 83.97179841174827, "iter": 510, "memory": 16131, "step": 510} +{"base_lr": 1.4012096774193555e-06, "lr": 1.4012096774193555e-06, "data_time": 0.00827336311340332, "loss": 1.90625, "time": 1.0396003723144531, "tflops": 8.149676055703795, "tokens_per_sec": 134.6671314557002, "iter": 520, "memory": 16131, "step": 520} +{"base_lr": 1.4278225806451622e-06, "lr": 1.4278225806451622e-06, "data_time": 0.008142948150634766, "loss": 2.34375, "time": 1.044724702835083, "tflops": 4.398672058662919, "tokens_per_sec": 72.74643721325346, "iter": 530, "memory": 16131, "step": 530} +{"base_lr": 1.4544354838709687e-06, "lr": 1.4544354838709687e-06, "data_time": 0.008404731750488281, "loss": 2.546875, "time": 1.106694221496582, "tflops": 7.10782668990125, "tokens_per_sec": 117.46695471499217, "iter": 540, "memory": 16131, "step": 540} +{"base_lr": 1.4810483870967756e-06, "lr": 1.4810483870967756e-06, "data_time": 0.008424758911132812, "loss": 1.9609375, "time": 1.26153564453125, "tflops": 3.354855132967117, "tokens_per_sec": 55.487928782190274, "iter": 550, "memory": 16130, "step": 550} +{"base_lr": 1.5076612903225823e-06, "lr": 1.5076612903225823e-06, "data_time": 0.008009195327758789, "loss": 0.26171875, "time": 1.0370826721191406, "tflops": 7.584921565644231, "tokens_per_sec": 125.35162672637941, "iter": 560, "memory": 16130, "step": 560} +{"base_lr": 1.5342741935483888e-06, "lr": 1.5342741935483888e-06, "data_time": 0.008609294891357422, "loss": 2.359375, "time": 1.0406486988067627, "tflops": 7.500684920256563, "tokens_per_sec": 123.96114091892018, "iter": 570, "memory": 16131, "step": 570} +{"base_lr": 1.5608870967741953e-06, "lr": 1.5608870967741953e-06, "data_time": 0.00798654556274414, "loss": 0.2294921875, "time": 1.0319414138793945, "tflops": 7.15286139506319, "tokens_per_sec": 118.22376576713317, "iter": 580, "memory": 16130, "step": 580} +{"base_lr": 1.5875000000000022e-06, "lr": 1.5875000000000022e-06, "data_time": 0.010207414627075195, "loss": 1.3515625, "time": 1.0612356662750244, "tflops": 3.4748923255287996, "tokens_per_sec": 57.48016386789442, "iter": 590, "memory": 16131, "step": 590} +{"base_lr": 1.6141129032258084e-06, "lr": 1.6141129032258084e-06, "data_time": 0.008874893188476562, "loss": 1.90625, "time": 1.070939064025879, "tflops": 6.609480228367451, "tokens_per_sec": 109.2499133984933, "iter": 600, "memory": 16131, "step": 600} +{"base_lr": 1.6407258064516147e-06, "lr": 1.6407258064516147e-06, "data_time": 0.007974863052368164, "loss": 3.015625, "time": 1.0254802703857422, "tflops": 4.304157035523853, "tokens_per_sec": 71.18615746012286, "iter": 610, "memory": 16130, "step": 610} +{"base_lr": 1.6673387096774214e-06, "lr": 1.6673387096774214e-06, "data_time": 0.008660078048706055, "loss": 2.5625, "time": 1.0109422206878662, "tflops": 7.00173602864142, "tokens_per_sec": 115.73361721926601, "iter": 620, "memory": 16131, "step": 620} +{"base_lr": 1.6939516129032277e-06, "lr": 1.6939516129032277e-06, "data_time": 0.008180856704711914, "loss": 0.427734375, "time": 1.0164997577667236, "tflops": 6.844240171230571, "tokens_per_sec": 113.13332750078057, "iter": 630, "memory": 16131, "step": 630} +{"base_lr": 1.7205645161290337e-06, "lr": 1.7205645161290337e-06, "data_time": 0.008471250534057617, "loss": 2.484375, "time": 1.3150043487548828, "tflops": 4.047012336051175, "tokens_per_sec": 66.91993078445424, "iter": 640, "memory": 16130, "step": 640} +{"base_lr": 1.74717741935484e-06, "lr": 1.74717741935484e-06, "data_time": 0.008368968963623047, "loss": 1.078125, "time": 1.0985722541809082, "tflops": 4.072878147831967, "tokens_per_sec": 67.36015743917253, "iter": 650, "memory": 16131, "step": 650} +{"base_lr": 1.7737903225806463e-06, "lr": 1.7737903225806463e-06, "data_time": 0.008448600769042969, "loss": 1.9296875, "time": 1.1227614879608154, "tflops": 3.9312250802158903, "tokens_per_sec": 65.01826147645944, "iter": 660, "memory": 16130, "step": 660} +{"base_lr": 1.8004032258064532e-06, "lr": 1.8004032258064532e-06, "data_time": 0.008531808853149414, "loss": 1.1796875, "time": 1.0657172203063965, "tflops": 3.9145027106803205, "tokens_per_sec": 64.74513002623489, "iter": 670, "memory": 16130, "step": 670} +{"base_lr": 1.8270161290322595e-06, "lr": 1.8270161290322595e-06, "data_time": 0.008015155792236328, "loss": 0.0947265625, "time": 1.3633790016174316, "tflops": 6.747996586591716, "tokens_per_sec": 111.48770798110046, "iter": 680, "memory": 16131, "step": 680} +{"base_lr": 1.8536290322580662e-06, "lr": 1.8536290322580662e-06, "data_time": 0.008182048797607422, "loss": 0.4296875, "time": 1.232914924621582, "tflops": 4.31646881313008, "tokens_per_sec": 71.37556553380065, "iter": 690, "memory": 16131, "step": 690} +{"base_lr": 1.8802419354838724e-06, "lr": 1.8802419354838724e-06, "data_time": 0.008540868759155273, "loss": 1.6953125, "time": 1.2415738105773926, "tflops": 2.6290823986625154, "tokens_per_sec": 43.493185455356745, "iter": 700, "memory": 16130, "step": 700} +{"base_lr": 1.9068548387096791e-06, "lr": 1.9068548387096791e-06, "data_time": 0.008160114288330078, "loss": 0.287109375, "time": 1.068218469619751, "tflops": 6.17257697644279, "tokens_per_sec": 102.03905202902754, "iter": 710, "memory": 16131, "step": 710} +{"base_lr": 1.933467741935486e-06, "lr": 1.933467741935486e-06, "data_time": 0.008492708206176758, "loss": 2.65625, "time": 1.2647275924682617, "tflops": 3.8728146407219652, "tokens_per_sec": 64.04541221549148, "iter": 720, "memory": 16131, "step": 720} +{"base_lr": 1.960080645161292e-06, "lr": 1.960080645161292e-06, "data_time": 0.008059501647949219, "loss": 0.1962890625, "time": 1.3936829566955566, "tflops": 5.035405424368665, "tokens_per_sec": 83.2327032791981, "iter": 730, "memory": 16130, "step": 730} +{"base_lr": 1.9866935483870986e-06, "lr": 1.9866935483870986e-06, "data_time": 0.008306026458740234, "loss": 0.1865234375, "time": 1.0434024333953857, "tflops": 8.178087738039922, "tokens_per_sec": 135.13481997643999, "iter": 740, "memory": 16131, "step": 740} +{"base_lr": 1.9999996061557236e-06, "lr": 1.9999996061557236e-06, "data_time": 0.008544683456420898, "loss": 0.162109375, "time": 1.3108081817626953, "tflops": 6.232245523558006, "tokens_per_sec": 102.98989728486222, "iter": 750, "memory": 16131, "step": 750} +{"base_lr": 1.9999951754111564e-06, "lr": 1.9999951754111564e-06, "data_time": 0.008687496185302734, "loss": 2.171875, "time": 1.0354118347167969, "tflops": 4.613612563288863, "tokens_per_sec": 76.29814277864766, "iter": 760, "memory": 16130, "step": 760} +{"base_lr": 1.999985821638561e-06, "lr": 1.999985821638561e-06, "data_time": 0.008340835571289062, "loss": 2.0, "time": 1.0585877895355225, "tflops": 5.427703624463322, "tokens_per_sec": 89.74220271480128, "iter": 770, "memory": 16130, "step": 770} +{"base_lr": 1.9999715448839856e-06, "lr": 1.9999715448839856e-06, "data_time": 0.008420944213867188, "loss": 2.25, "time": 1.0833537578582764, "tflops": 4.968262374229149, "tokens_per_sec": 82.15229730302072, "iter": 780, "memory": 16131, "step": 780} +{"base_lr": 1.999952345217715e-06, "lr": 1.999952345217715e-06, "data_time": 0.008046150207519531, "loss": 0.439453125, "time": 1.01377272605896, "tflops": 5.4884451704443595, "tokens_per_sec": 90.75012341036152, "iter": 790, "memory": 16131, "step": 790} +{"base_lr": 1.999928222734272e-06, "lr": 1.999928222734272e-06, "data_time": 0.008173465728759766, "loss": 1.78125, "time": 1.2392878532409668, "tflops": 3.219763202521218, "tokens_per_sec": 53.25639223150985, "iter": 800, "memory": 16131, "step": 800} +{"base_lr": 1.99989917755241e-06, "lr": 1.99989917755241e-06, "data_time": 0.007458686828613281, "loss": 0.345703125, "time": 1.0087604522705078, "tflops": 6.716567211943395, "tokens_per_sec": 111.02735019776053, "iter": 810, "memory": 16131, "step": 810} +{"base_lr": 1.9998652098151227e-06, "lr": 1.9998652098151227e-06, "data_time": 0.008054971694946289, "loss": 2.296875, "time": 1.05586576461792, "tflops": 7.277778702035496, "tokens_per_sec": 120.28044118452544, "iter": 820, "memory": 16131, "step": 820} +{"base_lr": 1.9998263196896337e-06, "lr": 1.9998263196896337e-06, "data_time": 0.008306264877319336, "loss": 2.421875, "time": 1.0630297660827637, "tflops": 7.456804415404667, "tokens_per_sec": 123.23267342043324, "iter": 830, "memory": 16131, "step": 830} +{"base_lr": 1.9997825073674015e-06, "lr": 1.9997825073674015e-06, "data_time": 0.008565902709960938, "loss": 2.640625, "time": 1.3298759460449219, "tflops": 3.910703002173417, "tokens_per_sec": 64.66768592642124, "iter": 840, "memory": 16131, "step": 840} +{"base_lr": 1.999733773064116e-06, "lr": 1.999733773064116e-06, "data_time": 0.008158683776855469, "loss": 2.546875, "time": 1.0212271213531494, "tflops": 4.677695048382285, "tokens_per_sec": 77.35791416824675, "iter": 850, "memory": 16131, "step": 850} +{"base_lr": 1.9996801170196976e-06, "lr": 1.9996801170196976e-06, "data_time": 0.008026123046875, "loss": 0.2197265625, "time": 1.0231528282165527, "tflops": 5.734058967840276, "tokens_per_sec": 94.80499620861616, "iter": 860, "memory": 16131, "step": 860} +{"base_lr": 1.9996215394983004e-06, "lr": 1.9996215394983004e-06, "data_time": 0.008556842803955078, "loss": 1.609375, "time": 1.3322386741638184, "tflops": 3.9492124326223377, "tokens_per_sec": 65.3036138997694, "iter": 870, "memory": 16131, "step": 870} +{"base_lr": 1.9995580407883036e-06, "lr": 1.9995580407883036e-06, "data_time": 0.008893251419067383, "loss": 0.0947265625, "time": 1.0334019660949707, "tflops": 6.321917260963756, "tokens_per_sec": 104.50918765715815, "iter": 880, "memory": 16130, "step": 880} +{"base_lr": 1.999489621202313e-06, "lr": 1.999489621202313e-06, "data_time": 0.008881330490112305, "loss": 0.40625, "time": 1.0321481227874756, "tflops": 7.3275753750994905, "tokens_per_sec": 121.10664859060834, "iter": 890, "memory": 16130, "step": 890} +{"base_lr": 1.9994162810771625e-06, "lr": 1.9994162810771625e-06, "data_time": 0.008353233337402344, "loss": 1.2890625, "time": 1.0287361145019531, "tflops": 3.7611152693141823, "tokens_per_sec": 62.21226133479566, "iter": 900, "memory": 16131, "step": 900} +{"base_lr": 1.999338020773911e-06, "lr": 1.999338020773911e-06, "data_time": 0.008432626724243164, "loss": 1.8515625, "time": 1.036409616470337, "tflops": 4.025197066306239, "tokens_per_sec": 66.57599360658602, "iter": 910, "memory": 16131, "step": 910} +{"base_lr": 1.999254840677837e-06, "lr": 1.999254840677837e-06, "data_time": 0.00848698616027832, "loss": 2.328125, "time": 1.0552129745483398, "tflops": 6.478306247414664, "tokens_per_sec": 107.08738683606502, "iter": 920, "memory": 16130, "step": 920} +{"base_lr": 1.99916674119844e-06, "lr": 1.99916674119844e-06, "data_time": 0.008119344711303711, "loss": 0.259765625, "time": 1.0063934326171875, "tflops": 7.575317188156125, "tokens_per_sec": 125.19954514429223, "iter": 930, "memory": 16131, "step": 930} +{"base_lr": 1.999073722769438e-06, "lr": 1.999073722769438e-06, "data_time": 0.008267879486083984, "loss": 1.703125, "time": 1.0351288318634033, "tflops": 3.8547988466451084, "tokens_per_sec": 63.76017937895257, "iter": 940, "memory": 16131, "step": 940} +{"base_lr": 1.9989757858487664e-06, "lr": 1.9989757858487664e-06, "data_time": 0.008328914642333984, "loss": 2.140625, "time": 1.4201805591583252, "tflops": 3.193168158480085, "tokens_per_sec": 52.810186364187516, "iter": 950, "memory": 16131, "step": 950} +{"base_lr": 1.9988729309185732e-06, "lr": 1.9988729309185732e-06, "data_time": 0.011367082595825195, "loss": 1.8984375, "time": 1.3723738193511963, "tflops": 3.8778347024195456, "tokens_per_sec": 64.1224706848013, "iter": 960, "memory": 16131, "step": 960} +{"base_lr": 1.998765158485219e-06, "lr": 1.998765158485219e-06, "data_time": 0.008350133895874023, "loss": 0.2138671875, "time": 1.039778709411621, "tflops": 6.924107043294889, "tokens_per_sec": 114.44742897959894, "iter": 970, "memory": 16131, "step": 970} +{"base_lr": 1.9986524690792733e-06, "lr": 1.9986524690792733e-06, "data_time": 0.008599042892456055, "loss": 1.8359375, "time": 1.041391134262085, "tflops": 3.5992079140219593, "tokens_per_sec": 59.53574786660037, "iter": 980, "memory": 16131, "step": 980} +{"base_lr": 1.9985348632555117e-06, "lr": 1.9985348632555117e-06, "data_time": 0.008408546447753906, "loss": 0.1962890625, "time": 1.0268840789794922, "tflops": 6.893037602160027, "tokens_per_sec": 113.93691108363426, "iter": 990, "memory": 16131, "step": 990} +{"base_lr": 1.9984123415929133e-06, "lr": 1.9984123415929133e-06, "data_time": 0.008064746856689453, "loss": 0.1474609375, "time": 1.015733003616333, "tflops": 7.326664938594112, "tokens_per_sec": 121.09481484008073, "iter": 1000, "memory": 16130, "step": 1000} +{"base_lr": 1.9982849046946588e-06, "lr": 1.9982849046946588e-06, "data_time": 0.008089065551757812, "loss": 2.375, "time": 1.272737979888916, "tflops": 4.895124438664803, "tokens_per_sec": 80.92789060078874, "iter": 1010, "memory": 16131, "step": 1010} +{"base_lr": 1.998152553188127e-06, "lr": 1.998152553188127e-06, "data_time": 0.008710861206054688, "loss": 3.0, "time": 1.0319976806640625, "tflops": 7.03503107390829, "tokens_per_sec": 116.27933109565417, "iter": 1020, "memory": 16131, "step": 1020} +{"base_lr": 1.9980152877248906e-06, "lr": 1.9980152877248906e-06, "data_time": 0.008044242858886719, "loss": 1.71875, "time": 1.3313405513763428, "tflops": 6.454898882018872, "tokens_per_sec": 106.65941171332415, "iter": 1030, "memory": 16131, "step": 1030} +{"base_lr": 1.9978731089807145e-06, "lr": 1.9978731089807145e-06, "data_time": 0.00813603401184082, "loss": 0.21484375, "time": 1.2392292022705078, "tflops": 4.538780498608429, "tokens_per_sec": 75.04664982840215, "iter": 1040, "memory": 16131, "step": 1040} +{"base_lr": 1.997726017655552e-06, "lr": 1.997726017655552e-06, "data_time": 0.007851123809814453, "loss": 1.953125, "time": 1.0009186267852783, "tflops": 7.556201838588546, "tokens_per_sec": 124.8852770392999, "iter": 1050, "memory": 16131, "step": 1050} +{"base_lr": 1.997574014473542e-06, "lr": 1.997574014473542e-06, "data_time": 0.008387088775634766, "loss": 0.1591796875, "time": 1.0371522903442383, "tflops": 7.993546192596577, "tokens_per_sec": 132.09246248147087, "iter": 1060, "memory": 16131, "step": 1060} +{"base_lr": 1.997417100183004e-06, "lr": 1.997417100183004e-06, "data_time": 0.008855342864990234, "loss": 0.12109375, "time": 1.3419909477233887, "tflops": 5.048772787864959, "tokens_per_sec": 83.45808903548729, "iter": 1070, "memory": 16131, "step": 1070} +{"base_lr": 1.9972552755564346e-06, "lr": 1.9972552755564346e-06, "data_time": 0.008716821670532227, "loss": 1.71875, "time": 1.0529735088348389, "tflops": 5.9167782827357955, "tokens_per_sec": 97.81822537385216, "iter": 1080, "memory": 16131, "step": 1080} +{"base_lr": 1.9970885413905052e-06, "lr": 1.9970885413905052e-06, "data_time": 0.008357524871826172, "loss": 0.23046875, "time": 1.0397582054138184, "tflops": 6.749414126720182, "tokens_per_sec": 111.56439968052095, "iter": 1090, "memory": 16130, "step": 1090} +{"base_lr": 1.996916898506057e-06, "lr": 1.996916898506057e-06, "data_time": 0.008685111999511719, "loss": 0.0751953125, "time": 1.0236825942993164, "tflops": 7.210568922837958, "tokens_per_sec": 119.17756605345681, "iter": 1100, "memory": 16130, "step": 1100} +{"base_lr": 1.996740347748096e-06, "lr": 1.996740347748096e-06, "data_time": 0.008006811141967773, "loss": 0.37890625, "time": 1.0310401916503906, "tflops": 6.0426459024814845, "tokens_per_sec": 99.89911240514061, "iter": 1110, "memory": 16131, "step": 1110} +{"base_lr": 1.9965588899857913e-06, "lr": 1.9965588899857913e-06, "data_time": 0.008271932601928711, "loss": 2.265625, "time": 1.0251178741455078, "tflops": 4.718990480921642, "tokens_per_sec": 78.03980597509955, "iter": 1120, "memory": 16131, "step": 1120} +{"base_lr": 1.996372526112469e-06, "lr": 1.996372526112469e-06, "data_time": 0.008522510528564453, "loss": 2.03125, "time": 1.0514943599700928, "tflops": 6.501216806872774, "tokens_per_sec": 107.46610186583078, "iter": 1130, "memory": 16130, "step": 1130} +{"base_lr": 1.9961812570456086e-06, "lr": 1.9961812570456086e-06, "data_time": 0.00832819938659668, "loss": 0.12158203125, "time": 1.0147068500518799, "tflops": 6.856333408804165, "tokens_per_sec": 113.33322525023551, "iter": 1140, "memory": 16132, "step": 1140} +{"base_lr": 1.9959850837268362e-06, "lr": 1.9959850837268362e-06, "data_time": 0.008744478225708008, "loss": 0.07080078125, "time": 1.4088335037231445, "tflops": 5.1102838385661356, "tokens_per_sec": 84.46704290140212, "iter": 1150, "memory": 16130, "step": 1150} +{"base_lr": 1.995784007121924e-06, "lr": 1.995784007121924e-06, "data_time": 0.008178234100341797, "loss": 1.5078125, "time": 1.0357067584991455, "tflops": 3.969500184134815, "tokens_per_sec": 65.65564957640511, "iter": 1160, "memory": 16131, "step": 1160} +{"base_lr": 1.995578028220783e-06, "lr": 1.995578028220783e-06, "data_time": 0.008498668670654297, "loss": 1.453125, "time": 1.0115511417388916, "tflops": 5.261067485151875, "tokens_per_sec": 86.99510718621497, "iter": 1170, "memory": 16130, "step": 1170} +{"base_lr": 1.9953671480374565e-06, "lr": 1.9953671480374565e-06, "data_time": 0.009272098541259766, "loss": 0.078125, "time": 1.1071531772613525, "tflops": 5.408414177592238, "tokens_per_sec": 89.41852133306105, "iter": 1180, "memory": 16131, "step": 1180} +{"base_lr": 1.99515136761012e-06, "lr": 1.99515136761012e-06, "data_time": 0.008645057678222656, "loss": 2.546875, "time": 1.114736557006836, "tflops": 4.17670789953224, "tokens_per_sec": 69.07461634404686, "iter": 1190, "memory": 16132, "step": 1190} +{"base_lr": 1.99493068800107e-06, "lr": 1.99493068800107e-06, "data_time": 0.00860905647277832, "loss": 2.015625, "time": 1.1563637256622314, "tflops": 4.235739523765089, "tokens_per_sec": 70.04716440196403, "iter": 1200, "memory": 16130, "step": 1200} +{"base_lr": 1.9947051102967252e-06, "lr": 1.9947051102967252e-06, "data_time": 0.008734703063964844, "loss": 0.111328125, "time": 1.1182172298431396, "tflops": 6.980377152228251, "tokens_per_sec": 115.36220025690392, "iter": 1210, "memory": 16131, "step": 1210} +{"base_lr": 1.9944746356076162e-06, "lr": 1.9944746356076162e-06, "data_time": 0.008022308349609375, "loss": 0.1171875, "time": 1.1122207641601562, "tflops": 5.982842216094275, "tokens_per_sec": 98.90122855507258, "iter": 1220, "memory": 16130, "step": 1220} +{"base_lr": 1.9942392650683813e-06, "lr": 1.9942392650683813e-06, "data_time": 0.008499622344970703, "loss": 0.039306640625, "time": 1.0257453918457031, "tflops": 9.442225236033488, "tokens_per_sec": 155.98412751525368, "iter": 1230, "memory": 16131, "step": 1230} +{"base_lr": 1.9939989998377628e-06, "lr": 1.9939989998377628e-06, "data_time": 0.008163213729858398, "loss": 2.078125, "time": 1.1214323043823242, "tflops": 3.666060048837964, "tokens_per_sec": 60.63674083064087, "iter": 1240, "memory": 16130, "step": 1240} +{"base_lr": 1.9937538410985985e-06, "lr": 1.9937538410985985e-06, "data_time": 0.008352041244506836, "loss": 2.28125, "time": 1.0246052742004395, "tflops": 6.376193731842515, "tokens_per_sec": 105.40644550573236, "iter": 1250, "memory": 16132, "step": 1250} +{"base_lr": 1.993503790057816e-06, "lr": 1.993503790057816e-06, "data_time": 0.008065462112426758, "loss": 2.015625, "time": 1.104888916015625, "tflops": 4.76183031521409, "tokens_per_sec": 78.74094738288689, "iter": 1260, "memory": 16130, "step": 1260} +{"base_lr": 1.993248847946431e-06, "lr": 1.993248847946431e-06, "data_time": 0.00820612907409668, "loss": 0.2099609375, "time": 1.0216822624206543, "tflops": 6.453729279554388, "tokens_per_sec": 106.68678904304501, "iter": 1270, "memory": 16131, "step": 1270} +{"base_lr": 1.9929890160195366e-06, "lr": 1.9929890160195366e-06, "data_time": 0.00818490982055664, "loss": 0.171875, "time": 1.368915319442749, "tflops": 5.126510471700781, "tokens_per_sec": 84.73862360393186, "iter": 1280, "memory": 16130, "step": 1280} +{"base_lr": 1.9927242955562996e-06, "lr": 1.9927242955562996e-06, "data_time": 0.008594512939453125, "loss": 1.265625, "time": 1.2644636631011963, "tflops": 2.725014304751526, "tokens_per_sec": 45.07840095630581, "iter": 1290, "memory": 16131, "step": 1290} +{"base_lr": 1.992454687859951e-06, "lr": 1.992454687859951e-06, "data_time": 0.0079498291015625, "loss": 2.453125, "time": 1.0303597450256348, "tflops": 7.6344119256295855, "tokens_per_sec": 126.16952537935136, "iter": 1300, "memory": 16131, "step": 1300} +{"base_lr": 1.992180194257784e-06, "lr": 1.992180194257784e-06, "data_time": 0.009192705154418945, "loss": 2.53125, "time": 1.0323293209075928, "tflops": 4.744663778733934, "tokens_per_sec": 78.46333370509014, "iter": 1310, "memory": 16130, "step": 1310} +{"base_lr": 1.9919008161011454e-06, "lr": 1.9919008161011454e-06, "data_time": 0.008365869522094727, "loss": 1.6796875, "time": 1.2137665748596191, "tflops": 3.6863355941283884, "tokens_per_sec": 60.96724158720359, "iter": 1320, "memory": 16131, "step": 1320} +{"base_lr": 1.9916165547654275e-06, "lr": 1.9916165547654275e-06, "data_time": 0.008443593978881836, "loss": 0.10400390625, "time": 1.3014419078826904, "tflops": 5.066427238472848, "tokens_per_sec": 83.75325808990415, "iter": 1330, "memory": 16131, "step": 1330} +{"base_lr": 1.9913274116500647e-06, "lr": 1.9913274116500647e-06, "data_time": 0.008383035659790039, "loss": 2.015625, "time": 1.035897970199585, "tflops": 5.54658947827404, "tokens_per_sec": 91.70787349028666, "iter": 1340, "memory": 16131, "step": 1340} +{"base_lr": 1.9910333881785216e-06, "lr": 1.9910333881785216e-06, "data_time": 0.008636951446533203, "loss": 0.08837890625, "time": 1.0283820629119873, "tflops": 8.061716594072363, "tokens_per_sec": 133.21897078983935, "iter": 1350, "memory": 16130, "step": 1350} +{"base_lr": 1.9907344857982933e-06, "lr": 1.9907344857982933e-06, "data_time": 0.008021831512451172, "loss": 2.203125, "time": 1.0295426845550537, "tflops": 4.933906468173672, "tokens_per_sec": 81.58962349018235, "iter": 1360, "memory": 16130, "step": 1360} +{"base_lr": 1.9904307059808903e-06, "lr": 1.9904307059808903e-06, "data_time": 0.009438276290893555, "loss": 0.166015625, "time": 1.0723683834075928, "tflops": 6.14868997763028, "tokens_per_sec": 101.644175347222, "iter": 1370, "memory": 16130, "step": 1370} +{"base_lr": 1.9901220502218366e-06, "lr": 1.9901220502218366e-06, "data_time": 0.007943153381347656, "loss": 1.4765625, "time": 1.016953468322754, "tflops": 3.983196007727686, "tokens_per_sec": 65.8830537354243, "iter": 1380, "memory": 16130, "step": 1380} +{"base_lr": 1.9898085200406605e-06, "lr": 1.9898085200406605e-06, "data_time": 0.007818937301635742, "loss": 0.0595703125, "time": 1.3784840106964111, "tflops": 5.090925005687612, "tokens_per_sec": 84.15041386030481, "iter": 1390, "memory": 16131, "step": 1390} +{"base_lr": 1.989490116980887e-06, "lr": 1.989490116980887e-06, "data_time": 0.008350372314453125, "loss": 0.053466796875, "time": 1.0527141094207764, "tflops": 5.803164980011076, "tokens_per_sec": 95.94247773070715, "iter": 1400, "memory": 16133, "step": 1400} +{"base_lr": 1.9891668426100307e-06, "lr": 1.9891668426100307e-06, "data_time": 0.008504390716552734, "loss": 1.671875, "time": 1.0429482460021973, "tflops": 4.754398128569075, "tokens_per_sec": 78.6232685219441, "iter": 1410, "memory": 16130, "step": 1410} +{"base_lr": 1.9888386985195894e-06, "lr": 1.9888386985195894e-06, "data_time": 0.008427619934082031, "loss": 0.11083984375, "time": 1.3761348724365234, "tflops": 4.835457246754789, "tokens_per_sec": 79.93402551100165, "iter": 1420, "memory": 16131, "step": 1420} +{"base_lr": 1.988505686325032e-06, "lr": 1.988505686325032e-06, "data_time": 0.008458375930786133, "loss": 1.9765625, "time": 1.0603210926055908, "tflops": 4.6194078103257326, "tokens_per_sec": 76.39195387585606, "iter": 1430, "memory": 16130, "step": 1430} +{"base_lr": 1.988167807665796e-06, "lr": 1.988167807665796e-06, "data_time": 0.008056879043579102, "loss": 0.06640625, "time": 1.0641679763793945, "tflops": 6.765416029337902, "tokens_per_sec": 111.82445125323201, "iter": 1440, "memory": 16131, "step": 1440} +{"base_lr": 1.9878250642052748e-06, "lr": 1.9878250642052748e-06, "data_time": 0.008192062377929688, "loss": 2.125, "time": 1.0262985229492188, "tflops": 4.3596973276449225, "tokens_per_sec": 72.1037771615203, "iter": 1450, "memory": 16131, "step": 1450} +{"base_lr": 1.9874774576308116e-06, "lr": 1.9874774576308116e-06, "data_time": 0.007958173751831055, "loss": 0.255859375, "time": 1.0145182609558105, "tflops": 6.021649770994188, "tokens_per_sec": 99.55463976049585, "iter": 1460, "memory": 16131, "step": 1460} +{"base_lr": 1.987124989653693e-06, "lr": 1.987124989653693e-06, "data_time": 0.008524656295776367, "loss": 1.9921875, "time": 1.021106481552124, "tflops": 3.7892180475310218, "tokens_per_sec": 62.677106801491135, "iter": 1470, "memory": 16131, "step": 1470} +{"base_lr": 1.9867676620091357e-06, "lr": 1.9867676620091357e-06, "data_time": 0.008939743041992188, "loss": 1.015625, "time": 1.042874813079834, "tflops": 3.88419103066211, "tokens_per_sec": 64.24548676371838, "iter": 1480, "memory": 16131, "step": 1480} +{"base_lr": 1.986405476456283e-06, "lr": 1.986405476456283e-06, "data_time": 0.00806427001953125, "loss": 1.5703125, "time": 1.018507480621338, "tflops": 4.690185531001814, "tokens_per_sec": 77.56447694594121, "iter": 1490, "memory": 16131, "step": 1490} +{"base_lr": 1.986038434778193e-06, "lr": 1.986038434778193e-06, "data_time": 0.008204936981201172, "loss": 2.359375, "time": 1.0337293148040771, "tflops": 6.319915313814909, "tokens_per_sec": 104.47609297059044, "iter": 1500, "memory": 16130, "step": 1500} +{"base_lr": 1.9856665387818316e-06, "lr": 1.9856665387818316e-06, "data_time": 0.008387565612792969, "loss": 0.1669921875, "time": 1.023531436920166, "tflops": 7.2708420241216185, "tokens_per_sec": 120.17217602029906, "iter": 1510, "memory": 16131, "step": 1510} +{"base_lr": 1.985289790298061e-06, "lr": 1.985289790298061e-06, "data_time": 0.008054494857788086, "loss": 1.7890625, "time": 1.0553884506225586, "tflops": 6.534636449619352, "tokens_per_sec": 108.01710018017063, "iter": 1520, "memory": 16131, "step": 1520} +{"base_lr": 1.984908191181634e-06, "lr": 1.984908191181634e-06, "data_time": 0.008415937423706055, "loss": 1.7265625, "time": 1.4113469123840332, "tflops": 2.7843639508299196, "tokens_per_sec": 46.05529613563017, "iter": 1530, "memory": 16130, "step": 1530} +{"base_lr": 1.9845217433111825e-06, "lr": 1.9845217433111825e-06, "data_time": 0.008168458938598633, "loss": 0.08251953125, "time": 1.0408711433410645, "tflops": 6.218353126218738, "tokens_per_sec": 102.79850746600655, "iter": 1540, "memory": 16131, "step": 1540} +{"base_lr": 1.9841304485892094e-06, "lr": 1.9841304485892094e-06, "data_time": 0.007993936538696289, "loss": 1.90625, "time": 1.0413732528686523, "tflops": 4.470951189424084, "tokens_per_sec": 73.94082744857862, "iter": 1550, "memory": 16130, "step": 1550} +{"base_lr": 1.9837343089420786e-06, "lr": 1.9837343089420786e-06, "data_time": 0.008038520812988281, "loss": 0.01202392578125, "time": 1.399256944656372, "tflops": 5.88162225025455, "tokens_per_sec": 97.19444346463581, "iter": 1560, "memory": 16130, "step": 1560} +{"base_lr": 1.9833333263200066e-06, "lr": 1.9833333263200066e-06, "data_time": 0.008396148681640625, "loss": 2.1875, "time": 1.2094852924346924, "tflops": 5.351453522148669, "tokens_per_sec": 88.46738415852967, "iter": 1570, "memory": 16130, "step": 1570} +{"base_lr": 1.982927502697052e-06, "lr": 1.982927502697052e-06, "data_time": 0.008539676666259766, "loss": 2.734375, "time": 1.0442323684692383, "tflops": 7.416899838799375, "tokens_per_sec": 122.57808114827473, "iter": 1580, "memory": 16131, "step": 1580} +{"base_lr": 1.9825168400711044e-06, "lr": 1.9825168400711044e-06, "data_time": 0.008669137954711914, "loss": 0.048828125, "time": 1.0526227951049805, "tflops": 6.148930422450459, "tokens_per_sec": 101.6508482406815, "iter": 1590, "memory": 16131, "step": 1590} +{"base_lr": 1.9821013404638783e-06, "lr": 1.9821013404638783e-06, "data_time": 0.007944107055664062, "loss": 0.08642578125, "time": 1.3017616271972656, "tflops": 5.8564865555058025, "tokens_per_sec": 96.79191440846603, "iter": 1600, "memory": 16131, "step": 1600} +{"base_lr": 1.9816810059208993e-06, "lr": 1.9816810059208993e-06, "data_time": 0.00821828842163086, "loss": 2.0625, "time": 1.0435476303100586, "tflops": 7.8283713994012105, "tokens_per_sec": 129.36639984488247, "iter": 1610, "memory": 16131, "step": 1610} +{"base_lr": 1.981255838511497e-06, "lr": 1.981255838511497e-06, "data_time": 0.007716178894042969, "loss": 0.1572265625, "time": 1.0105760097503662, "tflops": 6.045140192139556, "tokens_per_sec": 99.9430018379807, "iter": 1620, "memory": 16130, "step": 1620} +{"base_lr": 1.980825840328791e-06, "lr": 1.980825840328791e-06, "data_time": 0.008075237274169922, "loss": 0.07568359375, "time": 1.0496459007263184, "tflops": 6.628109985795451, "tokens_per_sec": 109.56075750909372, "iter": 1630, "memory": 16131, "step": 1630} +{"base_lr": 1.980391013489685e-06, "lr": 1.980391013489685e-06, "data_time": 0.008698463439941406, "loss": 0.068359375, "time": 1.0035240650177002, "tflops": 6.208332222964581, "tokens_per_sec": 102.63829597158752, "iter": 1640, "memory": 16130, "step": 1640} +{"base_lr": 1.9799513601348543e-06, "lr": 1.9799513601348543e-06, "data_time": 0.008211135864257812, "loss": 0.0498046875, "time": 1.2908720970153809, "tflops": 5.530326390256934, "tokens_per_sec": 91.41107029328143, "iter": 1650, "memory": 16130, "step": 1650} +{"base_lr": 1.9795068824287355e-06, "lr": 1.9795068824287355e-06, "data_time": 0.008387088775634766, "loss": 1.9140625, "time": 1.349269151687622, "tflops": 4.797044622336176, "tokens_per_sec": 79.30219101658743, "iter": 1660, "memory": 16131, "step": 1660} +{"base_lr": 1.9790575825595147e-06, "lr": 1.9790575825595147e-06, "data_time": 0.008468151092529297, "loss": 1.8046875, "time": 1.0332763195037842, "tflops": 4.037403034370695, "tokens_per_sec": 66.7778779959551, "iter": 1670, "memory": 16131, "step": 1670} +{"base_lr": 1.978603462739118e-06, "lr": 1.978603462739118e-06, "data_time": 0.008030176162719727, "loss": 2.828125, "time": 1.0140016078948975, "tflops": 5.9649880703354565, "tokens_per_sec": 98.61917300851707, "iter": 1680, "memory": 16130, "step": 1680} +{"base_lr": 1.978144525203202e-06, "lr": 1.978144525203202e-06, "data_time": 0.008376359939575195, "loss": 0.041015625, "time": 1.2197661399841309, "tflops": 5.604347080383238, "tokens_per_sec": 92.6407089816229, "iter": 1690, "memory": 16131, "step": 1690} +{"base_lr": 1.9776807722111397e-06, "lr": 1.9776807722111397e-06, "data_time": 0.008318185806274414, "loss": 0.061279296875, "time": 1.0115535259246826, "tflops": 7.416847730925907, "tokens_per_sec": 122.5837257465208, "iter": 1700, "memory": 16131, "step": 1700} +{"base_lr": 1.9772122060460107e-06, "lr": 1.9772122060460107e-06, "data_time": 0.008399248123168945, "loss": 0.1455078125, "time": 1.034928798675537, "tflops": 6.546737695478533, "tokens_per_sec": 108.22000522473155, "iter": 1710, "memory": 16130, "step": 1710} +{"base_lr": 1.97673882901459e-06, "lr": 1.97673882901459e-06, "data_time": 0.008899688720703125, "loss": 0.007568359375, "time": 1.006026268005371, "tflops": 8.120343059460017, "tokens_per_sec": 134.19132709877218, "iter": 1720, "memory": 16131, "step": 1720} +{"base_lr": 1.9762606434473385e-06, "lr": 1.9762606434473385e-06, "data_time": 0.008356332778930664, "loss": 0.2197265625, "time": 0.9961123466491699, "tflops": 7.957741995766927, "tokens_per_sec": 131.51127023025106, "iter": 1730, "memory": 16131, "step": 1730} +{"base_lr": 1.9757776516983885e-06, "lr": 1.9757776516983885e-06, "data_time": 0.009429216384887695, "loss": 1.8671875, "time": 1.336113691329956, "tflops": 3.6658972723802057, "tokens_per_sec": 60.62358355097213, "iter": 1740, "memory": 16131, "step": 1740} +{"base_lr": 1.9752898561455326e-06, "lr": 1.9752898561455326e-06, "data_time": 0.008620262145996094, "loss": 1.9296875, "time": 1.0479419231414795, "tflops": 4.269655435154932, "tokens_per_sec": 70.61460026152504, "iter": 1750, "memory": 16131, "step": 1750} +{"base_lr": 1.974797259190213e-06, "lr": 1.974797259190213e-06, "data_time": 0.008453130722045898, "loss": 0.03857421875, "time": 1.1057560443878174, "tflops": 4.9771234274752985, "tokens_per_sec": 82.2966335673962, "iter": 1760, "memory": 16131, "step": 1760} +{"base_lr": 1.9742998632575115e-06, "lr": 1.9742998632575115e-06, "data_time": 0.008532524108886719, "loss": 0.01092529296875, "time": 1.3153891563415527, "tflops": 5.519382394687554, "tokens_per_sec": 91.2277552398719, "iter": 1770, "memory": 16131, "step": 1770} +{"base_lr": 1.9737976707961333e-06, "lr": 1.9737976707961333e-06, "data_time": 0.008144617080688477, "loss": 2.3125, "time": 1.0543324947357178, "tflops": 3.8993753764581034, "tokens_per_sec": 64.49578319881014, "iter": 1780, "memory": 16131, "step": 1780} +{"base_lr": 1.973290684278398e-06, "lr": 1.973290684278398e-06, "data_time": 0.0085906982421875, "loss": 0.021240234375, "time": 1.0316658020019531, "tflops": 8.564995211554669, "tokens_per_sec": 141.5186969622766, "iter": 1790, "memory": 16131, "step": 1790} +{"base_lr": 1.9727789062002262e-06, "lr": 1.9727789062002262e-06, "data_time": 0.008134126663208008, "loss": 0.004638671875, "time": 1.043532133102417, "tflops": 6.899202101137891, "tokens_per_sec": 114.03577927792162, "iter": 1800, "memory": 16130, "step": 1800} +{"base_lr": 1.972262339081129e-06, "lr": 1.972262339081129e-06, "data_time": 0.008461713790893555, "loss": 0.06005859375, "time": 1.411794900894165, "tflops": 4.498779775654983, "tokens_per_sec": 74.37340929155043, "iter": 1810, "memory": 16131, "step": 1810} +{"base_lr": 1.9717409854641914e-06, "lr": 1.9717409854641914e-06, "data_time": 0.008640527725219727, "loss": 0.03857421875, "time": 1.2722463607788086, "tflops": 5.373167506005642, "tokens_per_sec": 88.81927548272802, "iter": 1820, "memory": 16131, "step": 1820} +{"base_lr": 1.9712148479160645e-06, "lr": 1.9712148479160645e-06, "data_time": 0.008591890335083008, "loss": 0.004364013671875, "time": 1.034998893737793, "tflops": 7.248837190897952, "tokens_per_sec": 119.80689134078861, "iter": 1830, "memory": 16130, "step": 1830} +{"base_lr": 1.970683929026952e-06, "lr": 1.970683929026952e-06, "data_time": 0.008215188980102539, "loss": 0.042236328125, "time": 1.2417731285095215, "tflops": 6.041794834513067, "tokens_per_sec": 99.85720994682431, "iter": 1840, "memory": 16130, "step": 1840} +{"base_lr": 1.9701482314105926e-06, "lr": 1.9701482314105926e-06, "data_time": 0.008553028106689453, "loss": 2.078125, "time": 1.1174554824829102, "tflops": 4.166545385058808, "tokens_per_sec": 68.90654814171417, "iter": 1850, "memory": 16131, "step": 1850} +{"base_lr": 1.969607757704257e-06, "lr": 1.969607757704257e-06, "data_time": 0.008233070373535156, "loss": 0.051513671875, "time": 1.0497658252716064, "tflops": 6.338786404779376, "tokens_per_sec": 104.78527434576645, "iter": 1860, "memory": 16131, "step": 1860} +{"base_lr": 1.9690625105687217e-06, "lr": 1.9690625105687217e-06, "data_time": 0.00906682014465332, "loss": 0.059326171875, "time": 1.0116541385650635, "tflops": 6.158439482298577, "tokens_per_sec": 101.8134519233955, "iter": 1870, "memory": 16131, "step": 1870} +{"base_lr": 1.9685124926882688e-06, "lr": 1.9685124926882688e-06, "data_time": 0.008533000946044922, "loss": 0.408203125, "time": 1.0284817218780518, "tflops": 5.822118967493109, "tokens_per_sec": 96.25839516051435, "iter": 1880, "memory": 16131, "step": 1880} +{"base_lr": 1.9679577067706638e-06, "lr": 1.9679577067706638e-06, "data_time": 0.00857853889465332, "loss": 1.8203125, "time": 1.050865888595581, "tflops": 4.488163774803628, "tokens_per_sec": 74.22450461701452, "iter": 1890, "memory": 16131, "step": 1890} +{"base_lr": 1.967398155547147e-06, "lr": 1.967398155547147e-06, "data_time": 0.00832056999206543, "loss": 2.140625, "time": 1.347440242767334, "tflops": 4.174277387634056, "tokens_per_sec": 69.01975838938152, "iter": 1900, "memory": 16131, "step": 1900} +{"base_lr": 1.966833841772419e-06, "lr": 1.966833841772419e-06, "data_time": 0.008473634719848633, "loss": 0.125, "time": 1.3213961124420166, "tflops": 4.531527589862061, "tokens_per_sec": 74.9207592392317, "iter": 1910, "memory": 16131, "step": 1910} +{"base_lr": 1.966264768224624e-06, "lr": 1.966264768224624e-06, "data_time": 0.008721590042114258, "loss": 0.005767822265625, "time": 1.3417577743530273, "tflops": 4.823899255070914, "tokens_per_sec": 79.74613752583906, "iter": 1920, "memory": 16130, "step": 1920} +{"base_lr": 1.9656909377053414e-06, "lr": 1.9656909377053414e-06, "data_time": 0.00816488265991211, "loss": 0.06396484375, "time": 1.0200226306915283, "tflops": 5.573547179685769, "tokens_per_sec": 92.15481811043757, "iter": 1930, "memory": 16131, "step": 1930} +{"base_lr": 1.965112353039568e-06, "lr": 1.965112353039568e-06, "data_time": 0.008690118789672852, "loss": 2.546875, "time": 1.029493808746338, "tflops": 4.346165940828995, "tokens_per_sec": 71.8799854561936, "iter": 1940, "memory": 16131, "step": 1940} +{"base_lr": 1.964529017075708e-06, "lr": 1.964529017075708e-06, "data_time": 0.008861780166625977, "loss": 0.0294189453125, "time": 1.3012828826904297, "tflops": 6.510810504351376, "tokens_per_sec": 107.5861381580917, "iter": 1950, "memory": 16131, "step": 1950} +{"base_lr": 1.963940932685552e-06, "lr": 1.963940932685552e-06, "data_time": 0.008187532424926758, "loss": 0.0038299560546875, "time": 1.0201799869537354, "tflops": 7.651177343452756, "tokens_per_sec": 126.44827545095102, "iter": 1960, "memory": 16131, "step": 1960} +{"base_lr": 1.9633481027642703e-06, "lr": 1.9633481027642703e-06, "data_time": 0.008329153060913086, "loss": 0.04248046875, "time": 1.0294301509857178, "tflops": 7.641305937639564, "tokens_per_sec": 126.28345874209518, "iter": 1970, "memory": 16131, "step": 1970} +{"base_lr": 1.9627505302303955e-06, "lr": 1.9627505302303955e-06, "data_time": 0.008417129516601562, "loss": 1.7734375, "time": 1.015392780303955, "tflops": 5.241162754552657, "tokens_per_sec": 86.6659697674537, "iter": 1980, "memory": 16131, "step": 1980} +{"base_lr": 1.962148218025809e-06, "lr": 1.962148218025809e-06, "data_time": 0.008054018020629883, "loss": 0.0201416015625, "time": 1.0272703170776367, "tflops": 7.244378875600594, "tokens_per_sec": 119.73479419690507, "iter": 1990, "memory": 16131, "step": 1990} +{"base_lr": 1.961541169115725e-06, "lr": 1.961541169115725e-06, "data_time": 0.008653402328491211, "loss": 0.016845703125, "time": 0.9910385608673096, "tflops": 6.714412137111812, "tokens_per_sec": 110.99467199704345, "iter": 2000, "memory": 16131, "step": 2000} +{"base_lr": 1.960929386488676e-06, "lr": 1.960929386488676e-06, "data_time": 0.008402109146118164, "loss": 0.049072265625, "time": 1.030564546585083, "tflops": 6.868420413762745, "tokens_per_sec": 113.53000681770202, "iter": 2010, "memory": 16131, "step": 2010} +{"base_lr": 1.9603128731564996e-06, "lr": 1.9603128731564996e-06, "data_time": 0.008638620376586914, "loss": 2.34375, "time": 1.0739595890045166, "tflops": 6.816582391576742, "tokens_per_sec": 112.66718155759067, "iter": 2020, "memory": 16131, "step": 2020} +{"base_lr": 1.9596916321543232e-06, "lr": 1.9596916321543232e-06, "data_time": 0.008850574493408203, "loss": 1.71875, "time": 1.4093282222747803, "tflops": 3.9480058188909366, "tokens_per_sec": 65.27932850974813, "iter": 2030, "memory": 16131, "step": 2030} +{"base_lr": 1.9590656665405487e-06, "lr": 1.9590656665405487e-06, "data_time": 0.00847172737121582, "loss": 0.0157470703125, "time": 1.2417936325073242, "tflops": 5.261004369767701, "tokens_per_sec": 86.97097260987609, "iter": 2040, "memory": 16131, "step": 2040} +{"base_lr": 1.9584349793968363e-06, "lr": 1.9584349793968363e-06, "data_time": 0.008553266525268555, "loss": 0.146484375, "time": 1.0497627258300781, "tflops": 6.107978857759302, "tokens_per_sec": 100.97519886323046, "iter": 2050, "memory": 16130, "step": 2050} +{"base_lr": 1.9577995738280926e-06, "lr": 1.9577995738280926e-06, "data_time": 0.008636713027954102, "loss": 0.11376953125, "time": 1.039635181427002, "tflops": 7.041637445959195, "tokens_per_sec": 116.38698089631707, "iter": 2060, "memory": 16131, "step": 2060} +{"base_lr": 1.9571594529624513e-06, "lr": 1.9571594529624513e-06, "data_time": 0.008453845977783203, "loss": 0.0040283203125, "time": 1.0348422527313232, "tflops": 8.655903132023527, "tokens_per_sec": 143.01696670118696, "iter": 2070, "memory": 16131, "step": 2070} +{"base_lr": 1.9565146199512604e-06, "lr": 1.9565146199512604e-06, "data_time": 0.008038759231567383, "loss": 2.796875, "time": 1.0240073204040527, "tflops": 5.847558724778872, "tokens_per_sec": 96.67899635799469, "iter": 2080, "memory": 16131, "step": 2080} +{"base_lr": 1.9558650779690663e-06, "lr": 1.9558650779690663e-06, "data_time": 0.008734464645385742, "loss": 0.00089263916015625, "time": 1.0192437171936035, "tflops": 6.053156192842181, "tokens_per_sec": 100.0742003892335, "iter": 2090, "memory": 16130, "step": 2090} +{"base_lr": 1.9552108302135985e-06, "lr": 1.9552108302135985e-06, "data_time": 0.008261680603027344, "loss": 2.46875, "time": 1.0047180652618408, "tflops": 6.1406693400481265, "tokens_per_sec": 101.52101721522855, "iter": 2100, "memory": 16131, "step": 2100} +{"base_lr": 1.954551879905752e-06, "lr": 1.954551879905752e-06, "data_time": 0.00841069221496582, "loss": 2.375, "time": 1.0048935413360596, "tflops": 3.6095144301692534, "tokens_per_sec": 59.70781732776099, "iter": 2110, "memory": 16131, "step": 2110} +{"base_lr": 1.953888230289574e-06, "lr": 1.953888230289574e-06, "data_time": 0.008430957794189453, "loss": 2.015625, "time": 1.0021629333496094, "tflops": 7.9096969062083495, "tokens_per_sec": 130.71726726313602, "iter": 2120, "memory": 16131, "step": 2120} +{"base_lr": 1.9532198846322444e-06, "lr": 1.9532198846322444e-06, "data_time": 0.008635997772216797, "loss": 2.875, "time": 1.0552515983581543, "tflops": 5.846607035085642, "tokens_per_sec": 96.65941293868038, "iter": 2130, "memory": 16130, "step": 2130} +{"base_lr": 1.952546846224065e-06, "lr": 1.952546846224065e-06, "data_time": 0.008285284042358398, "loss": 2.0625, "time": 0.9988217353820801, "tflops": 4.722022004824602, "tokens_per_sec": 78.09201305584776, "iter": 2140, "memory": 16131, "step": 2140} +{"base_lr": 1.9518691183784373e-06, "lr": 1.9518691183784373e-06, "data_time": 0.008579730987548828, "loss": 0.0068359375, "time": 1.2018277645111084, "tflops": 5.536767861359354, "tokens_per_sec": 91.52725810478789, "iter": 2150, "memory": 16131, "step": 2150} +{"base_lr": 1.951186704431853e-06, "lr": 1.951186704431853e-06, "data_time": 0.008121490478515625, "loss": 0.208984375, "time": 0.9995090961456299, "tflops": 7.627493834274802, "tokens_per_sec": 126.06188426474867, "iter": 2160, "memory": 16131, "step": 2160} +{"base_lr": 1.9504996077438687e-06, "lr": 1.9504996077438687e-06, "data_time": 0.008794546127319336, "loss": 1.5078125, "time": 1.233311653137207, "tflops": 4.315080302561307, "tokens_per_sec": 71.35260562574005, "iter": 2170, "memory": 16130, "step": 2170} +{"base_lr": 1.9498078316970976e-06, "lr": 1.9498078316970976e-06, "data_time": 0.008413553237915039, "loss": 0.01287841796875, "time": 1.0094234943389893, "tflops": 7.432498367176929, "tokens_per_sec": 122.8423953824032, "iter": 2180, "memory": 16130, "step": 2180} +{"base_lr": 1.9491113796971907e-06, "lr": 1.9491113796971907e-06, "data_time": 0.007963180541992188, "loss": 2.453125, "time": 1.2353253364562988, "tflops": 5.8771042230064054, "tokens_per_sec": 97.14040217465255, "iter": 2190, "memory": 16131, "step": 2190} +{"base_lr": 1.948410255172815e-06, "lr": 1.948410255172815e-06, "data_time": 0.008362770080566406, "loss": 0.04443359375, "time": 1.0008857250213623, "tflops": 6.043154920872571, "tokens_per_sec": 99.91150587922087, "iter": 2200, "memory": 16130, "step": 2200} +{"base_lr": 1.9477044615756444e-06, "lr": 1.9477044615756444e-06, "data_time": 0.008219003677368164, "loss": 2.09375, "time": 1.0406105518341064, "tflops": 4.474228110930948, "tokens_per_sec": 73.9950213499289, "iter": 2210, "memory": 16131, "step": 2210} +{"base_lr": 1.946994002380337e-06, "lr": 1.946994002380337e-06, "data_time": 0.008624076843261719, "loss": 2.09375, "time": 1.3004331588745117, "tflops": 5.256704474751399, "tokens_per_sec": 86.89412387616402, "iter": 2220, "memory": 16130, "step": 2220} +{"base_lr": 1.946278881084519e-06, "lr": 1.946278881084519e-06, "data_time": 0.008133649826049805, "loss": 0.00823974609375, "time": 1.0414581298828125, "tflops": 8.135138627901632, "tokens_per_sec": 134.42691163745462, "iter": 2230, "memory": 16131, "step": 2230} +{"base_lr": 1.945559101208772e-06, "lr": 1.945559101208772e-06, "data_time": 0.008620023727416992, "loss": 0.009033203125, "time": 1.3635048866271973, "tflops": 4.613684796677506, "tokens_per_sec": 76.27402073870152, "iter": 2240, "memory": 16131, "step": 2240} +{"base_lr": 1.944834666296607e-06, "lr": 1.944834666296607e-06, "data_time": 0.008150100708007812, "loss": 0.0286865234375, "time": 1.0568130016326904, "tflops": 5.666038297613151, "tokens_per_sec": 93.67787853381756, "iter": 2250, "memory": 16130, "step": 2250} +{"base_lr": 1.944105579914456e-06, "lr": 1.944105579914456e-06, "data_time": 0.008428812026977539, "loss": 1.6796875, "time": 1.0209300518035889, "tflops": 4.797640668900118, "tokens_per_sec": 79.33942179175249, "iter": 2260, "memory": 16131, "step": 2260} +{"base_lr": 1.9433718456516484e-06, "lr": 1.9433718456516484e-06, "data_time": 0.00836324691772461, "loss": 0.2236328125, "time": 1.0054993629455566, "tflops": 7.22042799749231, "tokens_per_sec": 119.3436857566444, "iter": 2270, "memory": 16131, "step": 2270} +{"base_lr": 1.9426334671203958e-06, "lr": 1.9426334671203958e-06, "data_time": 0.008779764175415039, "loss": 2.109375, "time": 1.017517328262329, "tflops": 6.242010991950691, "tokens_per_sec": 103.19234580428339, "iter": 2280, "memory": 16131, "step": 2280} +{"base_lr": 1.9418904479557744e-06, "lr": 1.9418904479557744e-06, "data_time": 0.008271932601928711, "loss": 0.0004825592041015625, "time": 1.0330626964569092, "tflops": 8.377325270043384, "tokens_per_sec": 138.42335077077902, "iter": 2290, "memory": 16131, "step": 2290} +{"base_lr": 1.941142791815707e-06, "lr": 1.941142791815707e-06, "data_time": 0.014037847518920898, "loss": 0.1435546875, "time": 1.0566141605377197, "tflops": 6.641742068289111, "tokens_per_sec": 109.784635046777, "iter": 2300, "memory": 16131, "step": 2300} +{"base_lr": 1.9403905023809418e-06, "lr": 1.9403905023809418e-06, "data_time": 0.008298873901367188, "loss": 0.0283203125, "time": 1.0288848876953125, "tflops": 6.938525494878435, "tokens_per_sec": 114.6872710553691, "iter": 2310, "memory": 16131, "step": 2310} +{"base_lr": 1.939633583355039e-06, "lr": 1.939633583355039e-06, "data_time": 0.008645296096801758, "loss": 0.06298828125, "time": 1.221311092376709, "tflops": 4.50621004586599, "tokens_per_sec": 74.51009048221832, "iter": 2320, "memory": 16131, "step": 2320} +{"base_lr": 1.9388720384643504e-06, "lr": 1.9388720384643504e-06, "data_time": 0.008362531661987305, "loss": 0.00112152099609375, "time": 1.0186176300048828, "tflops": 6.11633905191688, "tokens_per_sec": 101.1174330444341, "iter": 2330, "memory": 16131, "step": 2330} +{"base_lr": 1.938105871458002e-06, "lr": 1.938105871458002e-06, "data_time": 0.008234500885009766, "loss": 0.0157470703125, "time": 1.001795768737793, "tflops": 7.549585857725861, "tokens_per_sec": 124.77593128324776, "iter": 2340, "memory": 16131, "step": 2340} +{"base_lr": 1.9373350861078727e-06, "lr": 1.9373350861078727e-06, "data_time": 0.009712457656860352, "loss": 0.00653076171875, "time": 1.0402562618255615, "tflops": 6.454970571702782, "tokens_per_sec": 106.7044766499148, "iter": 2350, "memory": 16130, "step": 2350} +{"base_lr": 1.9365596862085775e-06, "lr": 1.9365596862085775e-06, "data_time": 0.008053064346313477, "loss": 1.78125, "time": 0.9974122047424316, "tflops": 4.0612346388061145, "tokens_per_sec": 67.17383212413637, "iter": 2360, "memory": 16130, "step": 2360} +{"base_lr": 1.935779675577452e-06, "lr": 1.935779675577452e-06, "data_time": 0.009476423263549805, "loss": 0.0081787109375, "time": 1.0226733684539795, "tflops": 5.855186147771094, "tokens_per_sec": 96.80510224840005, "iter": 2370, "memory": 16131, "step": 2370} +{"base_lr": 1.9349950580545288e-06, "lr": 1.9349950580545288e-06, "data_time": 0.008824348449707031, "loss": 0.275390625, "time": 1.0103471279144287, "tflops": 6.466175383162818, "tokens_per_sec": 106.8939545785893, "iter": 2380, "memory": 16131, "step": 2380} +{"base_lr": 1.93420583750252e-06, "lr": 1.93420583750252e-06, "data_time": 0.009033918380737305, "loss": 0.00653076171875, "time": 1.0302603244781494, "tflops": 6.458796076422789, "tokens_per_sec": 106.76913143831949, "iter": 2390, "memory": 16130, "step": 2390} +{"base_lr": 1.933412017806799e-06, "lr": 1.933412017806799e-06, "data_time": 0.008414983749389648, "loss": 0.010009765625, "time": 1.2150492668151855, "tflops": 5.875435852310991, "tokens_per_sec": 97.11540364877337, "iter": 2400, "memory": 16131, "step": 2400} +{"base_lr": 1.932613602875382e-06, "lr": 1.932613602875382e-06, "data_time": 0.008327722549438477, "loss": 2.046875, "time": 1.2604632377624512, "tflops": 4.65449405769422, "tokens_per_sec": 76.95583424718953, "iter": 2410, "memory": 16131, "step": 2410} +{"base_lr": 1.931810596638906e-06, "lr": 1.931810596638906e-06, "data_time": 0.008376359939575195, "loss": 2.25, "time": 1.0331013202667236, "tflops": 6.968860598732243, "tokens_per_sec": 115.18715315276305, "iter": 2420, "memory": 16131, "step": 2420} +{"base_lr": 1.931003003050614e-06, "lr": 1.931003003050614e-06, "data_time": 0.008258581161499023, "loss": 1.7734375, "time": 1.060689926147461, "tflops": 5.359855049397604, "tokens_per_sec": 88.62156383565309, "iter": 2430, "memory": 16130, "step": 2430} +{"base_lr": 1.9301908260863293e-06, "lr": 1.9301908260863293e-06, "data_time": 0.008459091186523438, "loss": 0.0216064453125, "time": 1.0250370502471924, "tflops": 6.550810583377585, "tokens_per_sec": 108.28876865779978, "iter": 2440, "memory": 16131, "step": 2440} +{"base_lr": 1.9293740697444424e-06, "lr": 1.9293740697444424e-06, "data_time": 0.008559942245483398, "loss": 2.046875, "time": 1.3771519660949707, "tflops": 3.1171485998378516, "tokens_per_sec": 51.55567558842098, "iter": 2450, "memory": 16130, "step": 2450} +{"base_lr": 1.9285527380458867e-06, "lr": 1.9285527380458867e-06, "data_time": 0.008121728897094727, "loss": 0.0022735595703125, "time": 1.2124311923980713, "tflops": 5.388414425439525, "tokens_per_sec": 89.07721995035232, "iter": 2460, "memory": 16130, "step": 2460} +{"base_lr": 1.92772683503412e-06, "lr": 1.92772683503412e-06, "data_time": 0.008621931076049805, "loss": 2.953125, "time": 1.0254237651824951, "tflops": 3.065288883556872, "tokens_per_sec": 50.710742003034056, "iter": 2470, "memory": 16131, "step": 2470} +{"base_lr": 1.9268963647751064e-06, "lr": 1.9268963647751064e-06, "data_time": 0.008876323699951172, "loss": 0.00115203857421875, "time": 1.015864372253418, "tflops": 7.743347379791712, "tokens_per_sec": 127.96983883931522, "iter": 2480, "memory": 16131, "step": 2480} +{"base_lr": 1.9260613313572934e-06, "lr": 1.9260613313572934e-06, "data_time": 0.008402824401855469, "loss": 1.703125, "time": 1.027975082397461, "tflops": 4.646989144606869, "tokens_per_sec": 76.85011179033445, "iter": 2490, "memory": 16131, "step": 2490} +{"base_lr": 1.925221738891594e-06, "lr": 1.925221738891594e-06, "data_time": 0.00833892822265625, "loss": 1.8984375, "time": 1.0083560943603516, "tflops": 4.677373637894455, "tokens_per_sec": 77.3536258035925, "iter": 2500, "memory": 16131, "step": 2500} +{"base_lr": 1.9243775915113664e-06, "lr": 1.9243775915113664e-06, "data_time": 0.008102178573608398, "loss": 0.05810546875, "time": 1.0337862968444824, "tflops": 6.729793669532332, "tokens_per_sec": 111.24155964430312, "iter": 2510, "memory": 16130, "step": 2510} +{"base_lr": 1.9235288933723904e-06, "lr": 1.9235288933723904e-06, "data_time": 0.00839996337890625, "loss": 1.875, "time": 1.0076220035552979, "tflops": 4.500571965275297, "tokens_per_sec": 74.43267389486854, "iter": 2520, "memory": 16131, "step": 2520} +{"base_lr": 1.9226756486528515e-06, "lr": 1.9226756486528515e-06, "data_time": 0.00860142707824707, "loss": 2.421875, "time": 1.0062429904937744, "tflops": 7.0344346607488335, "tokens_per_sec": 116.2741018871302, "iter": 2530, "memory": 16130, "step": 2530} +{"base_lr": 1.9218178615533173e-06, "lr": 1.9218178615533173e-06, "data_time": 0.008332490921020508, "loss": 2.109375, "time": 1.016385793685913, "tflops": 6.308557808330728, "tokens_per_sec": 104.29110743026793, "iter": 2540, "memory": 16130, "step": 2540} +{"base_lr": 1.920955536296719e-06, "lr": 1.920955536296719e-06, "data_time": 0.008226871490478516, "loss": 0.03076171875, "time": 1.0156888961791992, "tflops": 6.551456224898448, "tokens_per_sec": 108.30087875695774, "iter": 2550, "memory": 16131, "step": 2550} +{"base_lr": 1.9200886771283267e-06, "lr": 1.9200886771283267e-06, "data_time": 0.008177042007446289, "loss": 0.00122833251953125, "time": 1.3182179927825928, "tflops": 4.955994958907008, "tokens_per_sec": 81.92878612735639, "iter": 2560, "memory": 16131, "step": 2560} +{"base_lr": 1.9192172883157347e-06, "lr": 1.9192172883157347e-06, "data_time": 0.008335113525390625, "loss": 1.6328125, "time": 1.0303852558135986, "tflops": 4.4598865651731225, "tokens_per_sec": 73.75881940383178, "iter": 2570, "memory": 16130, "step": 2570} +{"base_lr": 1.918341374148835e-06, "lr": 1.918341374148835e-06, "data_time": 0.008873701095581055, "loss": 2.234375, "time": 1.0407707691192627, "tflops": 7.092180257475193, "tokens_per_sec": 117.22081712875499, "iter": 2580, "memory": 16130, "step": 2580} +{"base_lr": 1.9174609389397977e-06, "lr": 1.9174609389397977e-06, "data_time": 0.007714509963989258, "loss": 2.25, "time": 1.0307807922363281, "tflops": 6.102928782731531, "tokens_per_sec": 100.89439072129598, "iter": 2590, "memory": 16130, "step": 2590} +{"base_lr": 1.9165759870230514e-06, "lr": 1.9165759870230514e-06, "data_time": 0.008625507354736328, "loss": 1.8203125, "time": 1.0442044734954834, "tflops": 4.16901768705807, "tokens_per_sec": 68.95201258706585, "iter": 2600, "memory": 16131, "step": 2600} +{"base_lr": 1.9156865227552605e-06, "lr": 1.9156865227552605e-06, "data_time": 0.00863790512084961, "loss": 2.28125, "time": 1.2559030055999756, "tflops": 6.118591435550292, "tokens_per_sec": 101.12245884723228, "iter": 2610, "memory": 16131, "step": 2610} +{"base_lr": 1.9147925505153032e-06, "lr": 1.9147925505153032e-06, "data_time": 0.008528947830200195, "loss": 0.0120849609375, "time": 1.0055243968963623, "tflops": 5.3528146405595365, "tokens_per_sec": 88.51102994081262, "iter": 2620, "memory": 16131, "step": 2620} +{"base_lr": 1.9138940747042515e-06, "lr": 1.9138940747042515e-06, "data_time": 0.00832986831665039, "loss": 0.1650390625, "time": 1.0259222984313965, "tflops": 7.37204287260004, "tokens_per_sec": 121.84158604506334, "iter": 2630, "memory": 16131, "step": 2630} +{"base_lr": 1.9129910997453476e-06, "lr": 1.9129910997453476e-06, "data_time": 0.008359670639038086, "loss": 0.0128173828125, "time": 1.0274548530578613, "tflops": 6.2995510789817635, "tokens_per_sec": 104.14082884658886, "iter": 2640, "memory": 16131, "step": 2640} +{"base_lr": 1.9120836300839846e-06, "lr": 1.9120836300839846e-06, "data_time": 0.008369207382202148, "loss": 1.7421875, "time": 1.0133287906646729, "tflops": 4.176600301368867, "tokens_per_sec": 69.07925704352662, "iter": 2650, "memory": 16131, "step": 2650} +{"base_lr": 1.9111716701876813e-06, "lr": 1.9111716701876813e-06, "data_time": 0.008493185043334961, "loss": 0.027099609375, "time": 1.0382623672485352, "tflops": 6.759138095884927, "tokens_per_sec": 111.72513196957725, "iter": 2660, "memory": 16131, "step": 2660} +{"base_lr": 1.9102552245460656e-06, "lr": 1.9102552245460656e-06, "data_time": 0.00812530517578125, "loss": 2.140625, "time": 1.2975053787231445, "tflops": 5.548754712979616, "tokens_per_sec": 91.71445602561924, "iter": 2670, "memory": 16130, "step": 2670} +{"base_lr": 1.9093342976708457e-06, "lr": 1.9093342976708457e-06, "data_time": 0.007243633270263672, "loss": 0.1142578125, "time": 1.0025126934051514, "tflops": 7.121051006666681, "tokens_per_sec": 117.70424531891113, "iter": 2680, "memory": 16131, "step": 2680} +{"base_lr": 1.9084088940957915e-06, "lr": 1.9084088940957915e-06, "data_time": 0.008310794830322266, "loss": 0.00083160400390625, "time": 1.0242853164672852, "tflops": 7.679687093672752, "tokens_per_sec": 126.91776198475375, "iter": 2690, "memory": 16131, "step": 2690} +{"base_lr": 1.907479018376715e-06, "lr": 1.907479018376715e-06, "data_time": 0.008184432983398438, "loss": 0.0108642578125, "time": 1.0436317920684814, "tflops": 6.434092568035671, "tokens_per_sec": 106.3593509161802, "iter": 2700, "memory": 16131, "step": 2700} +{"base_lr": 1.9065446750914408e-06, "lr": 1.9065446750914408e-06, "data_time": 0.008382797241210938, "loss": 0.005767822265625, "time": 1.2310781478881836, "tflops": 6.045055220365987, "tokens_per_sec": 99.9124224655411, "iter": 2710, "memory": 16131, "step": 2710} +{"base_lr": 1.9056058688397899e-06, "lr": 1.9056058688397899e-06, "data_time": 0.00830221176147461, "loss": 1.9921875, "time": 1.0236475467681885, "tflops": 3.8980344746527402, "tokens_per_sec": 64.47531692749872, "iter": 2720, "memory": 16131, "step": 2720} +{"base_lr": 1.9046626042435536e-06, "lr": 1.9046626042435536e-06, "data_time": 0.008572816848754883, "loss": 0.00124359130859375, "time": 1.0770492553710938, "tflops": 6.2344628378209075, "tokens_per_sec": 103.05935354985436, "iter": 2730, "memory": 16130, "step": 2730} +{"base_lr": 1.9037148859464727e-06, "lr": 1.9037148859464727e-06, "data_time": 0.008700370788574219, "loss": 0.08203125, "time": 1.0122675895690918, "tflops": 6.513752686868415, "tokens_per_sec": 107.67903775946446, "iter": 2740, "memory": 16131, "step": 2740} +{"base_lr": 1.9027627186142135e-06, "lr": 1.9027627186142135e-06, "data_time": 0.008783578872680664, "loss": 0.039306640625, "time": 1.1433308124542236, "tflops": 6.402988482361663, "tokens_per_sec": 105.83113713183403, "iter": 2750, "memory": 16131, "step": 2750} +{"base_lr": 1.901806106934345e-06, "lr": 1.901806106934345e-06, "data_time": 0.008371114730834961, "loss": 0.01611328125, "time": 1.1033267974853516, "tflops": 6.525300664965549, "tokens_per_sec": 107.85562380167971, "iter": 2760, "memory": 16130, "step": 2760} +{"base_lr": 1.900845055616315e-06, "lr": 1.900845055616315e-06, "data_time": 0.00811767578125, "loss": 2.109375, "time": 0.9974806308746338, "tflops": 5.760212884571165, "tokens_per_sec": 95.23994457577055, "iter": 2770, "memory": 16133, "step": 2770} +{"base_lr": 1.899879569391431e-06, "lr": 1.899879569391431e-06, "data_time": 0.008313894271850586, "loss": 0.052001953125, "time": 1.0079996585845947, "tflops": 6.481234067224875, "tokens_per_sec": 107.1428934326659, "iter": 2780, "memory": 16131, "step": 2780} +{"base_lr": 1.8989096530128297e-06, "lr": 1.8989096530128297e-06, "data_time": 0.008834600448608398, "loss": 1.9296875, "time": 1.0034949779510498, "tflops": 7.899197532225983, "tokens_per_sec": 130.5437524633627, "iter": 2790, "memory": 16131, "step": 2790} +{"base_lr": 1.8979353112554606e-06, "lr": 1.8979353112554606e-06, "data_time": 0.008817434310913086, "loss": 0.1357421875, "time": 1.0246613025665283, "tflops": 7.321969176226951, "tokens_per_sec": 121.01559772901447, "iter": 2800, "memory": 16130, "step": 2800} +{"base_lr": 1.8969565489160584e-06, "lr": 1.8969565489160584e-06, "data_time": 0.008960723876953125, "loss": 2.53125, "time": 1.0197510719299316, "tflops": 5.040649548038081, "tokens_per_sec": 83.35367555833969, "iter": 2810, "memory": 16131, "step": 2810} +{"base_lr": 1.8959733708131212e-06, "lr": 1.8959733708131212e-06, "data_time": 0.008727550506591797, "loss": 0.00107574462890625, "time": 1.0417559146881104, "tflops": 6.910965403503897, "tokens_per_sec": 114.23021297221334, "iter": 2820, "memory": 16130, "step": 2820} +{"base_lr": 1.8949857817868858e-06, "lr": 1.8949857817868858e-06, "data_time": 0.00870060920715332, "loss": 0.03857421875, "time": 1.2218432426452637, "tflops": 5.743583526149363, "tokens_per_sec": 94.93852889734661, "iter": 2830, "memory": 16131, "step": 2830} +{"base_lr": 1.8939937866993037e-06, "lr": 1.8939937866993037e-06, "data_time": 0.009013891220092773, "loss": 2.34375, "time": 1.0129992961883545, "tflops": 9.86046841783084, "tokens_per_sec": 162.88264031444837, "iter": 2840, "memory": 16131, "step": 2840} +{"base_lr": 1.892997390434018e-06, "lr": 1.892997390434018e-06, "data_time": 0.008423089981079102, "loss": 2.09375, "time": 1.2716872692108154, "tflops": 4.946799356992062, "tokens_per_sec": 81.78111279234446, "iter": 2850, "memory": 16131, "step": 2850} +{"base_lr": 1.8919965978963395e-06, "lr": 1.8919965978963395e-06, "data_time": 0.00863194465637207, "loss": 0.07373046875, "time": 1.0067288875579834, "tflops": 6.489415181893479, "tokens_per_sec": 107.27813747539092, "iter": 2860, "memory": 16131, "step": 2860} +{"base_lr": 1.8909914140132209e-06, "lr": 1.8909914140132209e-06, "data_time": 0.008263587951660156, "loss": 0.1982421875, "time": 1.0197596549987793, "tflops": 6.822360976971093, "tokens_per_sec": 112.77167069335066, "iter": 2870, "memory": 16130, "step": 2870} +{"base_lr": 1.8899818437332347e-06, "lr": 1.8899818437332347e-06, "data_time": 0.008397579193115234, "loss": 2.53125, "time": 1.331874132156372, "tflops": 3.3139979326923883, "tokens_per_sec": 54.80998409493431, "iter": 2880, "memory": 16131, "step": 2880} +{"base_lr": 1.888967892026548e-06, "lr": 1.888967892026548e-06, "data_time": 0.008614301681518555, "loss": 0.058349609375, "time": 1.004462480545044, "tflops": 6.745306579045905, "tokens_per_sec": 111.5024226082738, "iter": 2890, "memory": 16130, "step": 2890} +{"base_lr": 1.887949563884898e-06, "lr": 1.887949563884898e-06, "data_time": 0.00799417495727539, "loss": 1.625, "time": 1.007993459701538, "tflops": 3.8385121164315996, "tokens_per_sec": 63.49247545602785, "iter": 2900, "memory": 16130, "step": 2900} +{"base_lr": 1.8869268643215662e-06, "lr": 1.8869268643215662e-06, "data_time": 0.008663177490234375, "loss": 1.6484375, "time": 1.2312133312225342, "tflops": 2.9951589856936653, "tokens_per_sec": 49.54462273356028, "iter": 2910, "memory": 16131, "step": 2910} +{"base_lr": 1.885899798371356e-06, "lr": 1.885899798371356e-06, "data_time": 0.00827646255493164, "loss": 0.005126953125, "time": 1.0147085189819336, "tflops": 7.334062191608708, "tokens_per_sec": 121.21707633171918, "iter": 2920, "memory": 16130, "step": 2920} +{"base_lr": 1.8848683710905655e-06, "lr": 1.8848683710905655e-06, "data_time": 0.008245229721069336, "loss": 0.000904083251953125, "time": 1.2806987762451172, "tflops": 5.952804523251072, "tokens_per_sec": 98.38379042519367, "iter": 2930, "memory": 16131, "step": 2930} +{"base_lr": 1.8838325875569657e-06, "lr": 1.8838325875569657e-06, "data_time": 0.008290767669677734, "loss": 0.263671875, "time": 1.005242109298706, "tflops": 6.137468139938975, "tokens_per_sec": 101.46809316519528, "iter": 2940, "memory": 16131, "step": 2940} +{"base_lr": 1.882792452869772e-06, "lr": 1.882792452869772e-06, "data_time": 0.008353948593139648, "loss": 0.004730224609375, "time": 1.0201773643493652, "tflops": 7.235343734579261, "tokens_per_sec": 119.58704854982534, "iter": 2950, "memory": 16131, "step": 2950} +{"base_lr": 1.881747972149621e-06, "lr": 1.881747972149621e-06, "data_time": 0.008565902709960938, "loss": 0.00092315673828125, "time": 1.041926383972168, "tflops": 6.677216915872976, "tokens_per_sec": 110.37248098226631, "iter": 2960, "memory": 16131, "step": 2960} +{"base_lr": 1.8806991505385454e-06, "lr": 1.8806991505385454e-06, "data_time": 0.00763249397277832, "loss": 1.609375, "time": 0.9828426837921143, "tflops": 7.695171661461969, "tokens_per_sec": 127.18210356675165, "iter": 2970, "memory": 16131, "step": 2970} +{"base_lr": 1.8796459931999485e-06, "lr": 1.8796459931999485e-06, "data_time": 0.008499860763549805, "loss": 2.65625, "time": 1.0330471992492676, "tflops": 5.268814643232371, "tokens_per_sec": 87.12089831453719, "iter": 2980, "memory": 16131, "step": 2980} +{"base_lr": 1.8785885053185789e-06, "lr": 1.8785885053185789e-06, "data_time": 0.008675336837768555, "loss": 1.2578125, "time": 1.0012948513031006, "tflops": 3.803763049830437, "tokens_per_sec": 62.91852985955925, "iter": 2990, "memory": 16131, "step": 2990} +{"base_lr": 1.877526692100503e-06, "lr": 1.877526692100503e-06, "data_time": 0.008327245712280273, "loss": 1.4921875, "time": 0.9908673763275146, "tflops": 6.165379746779132, "tokens_per_sec": 101.93089651839966, "iter": 3000, "memory": 16131, "step": 3000} +{"base_lr": 1.8764605587730838e-06, "lr": 1.8764605587730838e-06, "data_time": 0.008341312408447266, "loss": 0.0096435546875, "time": 0.9925103187561035, "tflops": 7.864480454174759, "tokens_per_sec": 129.97345978381722, "iter": 3010, "memory": 16131, "step": 3010} +{"base_lr": 1.8753901105849497e-06, "lr": 1.8753901105849497e-06, "data_time": 0.008634090423583984, "loss": 0.166015625, "time": 1.3163948059082031, "tflops": 5.192965495373965, "tokens_per_sec": 85.8405088600707, "iter": 3020, "memory": 16130, "step": 3020} +{"base_lr": 1.874315352805973e-06, "lr": 1.874315352805973e-06, "data_time": 0.008328914642333984, "loss": 0.036376953125, "time": 1.198322057723999, "tflops": 6.66550443255299, "tokens_per_sec": 110.15402674853578, "iter": 3030, "memory": 16131, "step": 3030} +{"base_lr": 1.873236290727242e-06, "lr": 1.873236290727242e-06, "data_time": 0.008420228958129883, "loss": 0.0004596710205078125, "time": 1.0051333904266357, "tflops": 8.18786924988988, "tokens_per_sec": 135.30542442942678, "iter": 3040, "memory": 16131, "step": 3040} +{"base_lr": 1.8721529296610352e-06, "lr": 1.8721529296610352e-06, "data_time": 0.008686304092407227, "loss": 1.1953125, "time": 1.2687666416168213, "tflops": 4.719499034934189, "tokens_per_sec": 78.02853318539631, "iter": 3050, "memory": 16131, "step": 3050} +{"base_lr": 1.8710652749407948e-06, "lr": 1.8710652749407948e-06, "data_time": 0.008723020553588867, "loss": 2.0625, "time": 1.007596492767334, "tflops": 5.942798514731178, "tokens_per_sec": 98.25361710817508, "iter": 3060, "memory": 16131, "step": 3060} +{"base_lr": 1.8699733319211011e-06, "lr": 1.8699733319211011e-06, "data_time": 0.007995128631591797, "loss": 0.10498046875, "time": 1.0178377628326416, "tflops": 6.121025390167493, "tokens_per_sec": 101.19490920954817, "iter": 3070, "memory": 16131, "step": 3070} +{"base_lr": 1.868877105977647e-06, "lr": 1.868877105977647e-06, "data_time": 0.0085296630859375, "loss": 0.006134033203125, "time": 0.9983758926391602, "tflops": 6.9077988448962895, "tokens_per_sec": 114.18544942880395, "iter": 3080, "memory": 16130, "step": 3080} +{"base_lr": 1.867776602507209e-06, "lr": 1.867776602507209e-06, "data_time": 0.008931875228881836, "loss": 0.006072998046875, "time": 1.0217390060424805, "tflops": 6.749844918476679, "tokens_per_sec": 111.5744816687058, "iter": 3090, "memory": 16130, "step": 3090} +{"base_lr": 1.866671826927623e-06, "lr": 1.866671826927623e-06, "data_time": 0.008536577224731445, "loss": 1.25, "time": 1.0126936435699463, "tflops": 5.972692267596186, "tokens_per_sec": 98.74654653443008, "iter": 3100, "memory": 16131, "step": 3100} +{"base_lr": 1.8655627846777565e-06, "lr": 1.8655627846777565e-06, "data_time": 0.008403301239013672, "loss": 2.65625, "time": 1.1206505298614502, "tflops": 5.343274090457947, "tokens_per_sec": 88.34154570216583, "iter": 3110, "memory": 16131, "step": 3110} +{"base_lr": 1.8644494812174807e-06, "lr": 1.8644494812174807e-06, "data_time": 0.0074977874755859375, "loss": 1.6015625, "time": 1.0988802909851074, "tflops": 4.95316392077216, "tokens_per_sec": 81.90155082246153, "iter": 3120, "memory": 16131, "step": 3120} +{"base_lr": 1.863331922027648e-06, "lr": 1.863331922027648e-06, "data_time": 0.008594989776611328, "loss": 0.0028228759765625, "time": 1.0892095565795898, "tflops": 7.221925916455164, "tokens_per_sec": 119.35260686484932, "iter": 3130, "memory": 16131, "step": 3130} +{"base_lr": 1.862210112610059e-06, "lr": 1.862210112610059e-06, "data_time": 0.008387565612792969, "loss": 0.04150390625, "time": 1.0166444778442383, "tflops": 6.128209934883857, "tokens_per_sec": 101.31368658816388, "iter": 3140, "memory": 16130, "step": 3140} +{"base_lr": 1.8610840584874396e-06, "lr": 1.8610840584874396e-06, "data_time": 0.008687973022460938, "loss": 0.01190185546875, "time": 1.229442834854126, "tflops": 5.905224338905935, "tokens_per_sec": 97.60518878792803, "iter": 3150, "memory": 16132, "step": 3150} +{"base_lr": 1.8599537652034127e-06, "lr": 1.8599537652034127e-06, "data_time": 0.008265256881713867, "loss": 1.3125, "time": 1.0176992416381836, "tflops": 4.51548077402347, "tokens_per_sec": 74.6782515800922, "iter": 3160, "memory": 16131, "step": 3160} +{"base_lr": 1.8588192383224714e-06, "lr": 1.8588192383224714e-06, "data_time": 0.008431196212768555, "loss": 0.37890625, "time": 0.9914085865020752, "tflops": 6.100923047009779, "tokens_per_sec": 100.86658655310104, "iter": 3170, "memory": 16131, "step": 3170} +{"base_lr": 1.8576804834299493e-06, "lr": 1.8576804834299493e-06, "data_time": 0.008219003677368164, "loss": 2.03125, "time": 1.2132413387298584, "tflops": 3.189138866962338, "tokens_per_sec": 52.751252332820115, "iter": 3180, "memory": 16131, "step": 3180} +{"base_lr": 1.8565375061319957e-06, "lr": 1.8565375061319957e-06, "data_time": 0.008380651473999023, "loss": 0.04443359375, "time": 1.0168750286102295, "tflops": 6.603391142656531, "tokens_per_sec": 109.15795636322719, "iter": 3190, "memory": 16131, "step": 3190} +{"base_lr": 1.855390312055548e-06, "lr": 1.855390312055548e-06, "data_time": 0.0091094970703125, "loss": 0.00640869140625, "time": 1.4615683555603027, "tflops": 4.884440742995117, "tokens_per_sec": 80.73519076340642, "iter": 3200, "memory": 16131, "step": 3200} +{"base_lr": 1.8542389068483006e-06, "lr": 1.8542389068483006e-06, "data_time": 0.008509397506713867, "loss": 1.671875, "time": 1.0110056400299072, "tflops": 4.8447361149145145, "tokens_per_sec": 80.11824740910818, "iter": 3210, "memory": 16131, "step": 3210} +{"base_lr": 1.8530832961786823e-06, "lr": 1.8530832961786823e-06, "data_time": 0.008330821990966797, "loss": 2.09375, "time": 1.0362465381622314, "tflops": 4.55148272122194, "tokens_per_sec": 75.2716628016501, "iter": 3220, "memory": 16131, "step": 3220} +{"base_lr": 1.851923485735823e-06, "lr": 1.851923485735823e-06, "data_time": 0.008130073547363281, "loss": 2.890625, "time": 1.0028879642486572, "tflops": 7.360078258426983, "tokens_per_sec": 121.64868295261495, "iter": 3230, "memory": 16131, "step": 3230} +{"base_lr": 1.850759481229531e-06, "lr": 1.850759481229531e-06, "data_time": 0.008363485336303711, "loss": 0.059326171875, "time": 1.0209097862243652, "tflops": 6.992727585628297, "tokens_per_sec": 115.58318040645322, "iter": 3240, "memory": 16131, "step": 3240} +{"base_lr": 1.84959128839026e-06, "lr": 1.84959128839026e-06, "data_time": 0.00859689712524414, "loss": 2.640625, "time": 1.0034501552581787, "tflops": 4.458966800116076, "tokens_per_sec": 73.74556634642875, "iter": 3250, "memory": 16131, "step": 3250} +{"base_lr": 1.8484189129690838e-06, "lr": 1.8484189129690838e-06, "data_time": 0.008162736892700195, "loss": 0.010986328125, "time": 0.9975576400756836, "tflops": 6.852729637608217, "tokens_per_sec": 113.27666238044505, "iter": 3260, "memory": 16130, "step": 3260} +{"base_lr": 1.8472423607376672e-06, "lr": 1.8472423607376672e-06, "data_time": 0.008424043655395508, "loss": 0.000789642333984375, "time": 1.0169463157653809, "tflops": 7.556305829340649, "tokens_per_sec": 124.88368169591286, "iter": 3270, "memory": 16131, "step": 3270} +{"base_lr": 1.8460616374882385e-06, "lr": 1.8460616374882385e-06, "data_time": 0.008785724639892578, "loss": 0.01397705078125, "time": 1.0128226280212402, "tflops": 7.048562924108799, "tokens_per_sec": 116.50608579946623, "iter": 3280, "memory": 16131, "step": 3280} +{"base_lr": 1.8448767490335583e-06, "lr": 1.8448767490335583e-06, "data_time": 0.008636713027954102, "loss": 0.0030975341796875, "time": 1.0191948413848877, "tflops": 6.528919762183697, "tokens_per_sec": 107.92833277140949, "iter": 3290, "memory": 16131, "step": 3290} +{"base_lr": 1.843687701206895e-06, "lr": 1.843687701206895e-06, "data_time": 0.008315086364746094, "loss": 2.484375, "time": 1.0143027305603027, "tflops": 4.530601388420754, "tokens_per_sec": 74.92832042159891, "iter": 3300, "memory": 16131, "step": 3300} +{"base_lr": 1.8424944998619918e-06, "lr": 1.8424944998619918e-06, "data_time": 0.007941484451293945, "loss": 0.06396484375, "time": 1.2411119937896729, "tflops": 5.11747076758266, "tokens_per_sec": 84.6015512905513, "iter": 3310, "memory": 16130, "step": 3310} +{"base_lr": 1.8412971508730406e-06, "lr": 1.8412971508730406e-06, "data_time": 0.008179187774658203, "loss": 1.9375, "time": 1.0113885402679443, "tflops": 6.818922267122079, "tokens_per_sec": 112.71632558708404, "iter": 3320, "memory": 16132, "step": 3320} +{"base_lr": 1.8400956601346525e-06, "lr": 1.8400956601346525e-06, "data_time": 0.013824224472045898, "loss": 0.02685546875, "time": 1.069453239440918, "tflops": 5.032838757831746, "tokens_per_sec": 83.22009482756219, "iter": 3330, "memory": 16131, "step": 3330} +{"base_lr": 1.8388900335618296e-06, "lr": 1.8388900335618296e-06, "data_time": 0.013128280639648438, "loss": 1.40625, "time": 1.042494535446167, "tflops": 4.756467319864717, "tokens_per_sec": 78.65748664555538, "iter": 3340, "memory": 16131, "step": 3340} +{"base_lr": 1.8376802770899332e-06, "lr": 1.8376802770899332e-06, "data_time": 0.008023500442504883, "loss": 0.02734375, "time": 1.2347779273986816, "tflops": 8.040294879954104, "tokens_per_sec": 132.8174049445211, "iter": 3350, "memory": 16131, "step": 3350} +{"base_lr": 1.8364663966746569e-06, "lr": 1.8364663966746569e-06, "data_time": 0.008185625076293945, "loss": 0.0026092529296875, "time": 0.9930317401885986, "tflops": 7.55518496522533, "tokens_per_sec": 124.87012749091464, "iter": 3360, "memory": 16130, "step": 3360} +{"base_lr": 1.8352483982919973e-06, "lr": 1.8352483982919973e-06, "data_time": 0.008586883544921875, "loss": 1.4296875, "time": 1.0328128337860107, "tflops": 5.445894118058487, "tokens_per_sec": 90.04535667803164, "iter": 3370, "memory": 16130, "step": 3370} +{"base_lr": 1.8340262879382225e-06, "lr": 1.8340262879382225e-06, "data_time": 0.00861358642578125, "loss": 0.000843048095703125, "time": 1.2011394500732422, "tflops": 6.498477759555344, "tokens_per_sec": 107.3980210974059, "iter": 3380, "memory": 16130, "step": 3380} +{"base_lr": 1.8328000716298459e-06, "lr": 1.8328000716298459e-06, "data_time": 0.008660554885864258, "loss": 2.03125, "time": 1.298144817352295, "tflops": 4.006294047626734, "tokens_per_sec": 66.24838681352973, "iter": 3390, "memory": 16131, "step": 3390} +{"base_lr": 1.8315697554035939e-06, "lr": 1.8315697554035939e-06, "data_time": 0.008432626724243164, "loss": 0.212890625, "time": 1.0252318382263184, "tflops": 6.254125453347775, "tokens_per_sec": 103.39124873772917, "iter": 3400, "memory": 16131, "step": 3400} +{"base_lr": 1.8303353453163766e-06, "lr": 1.8303353453163766e-06, "data_time": 0.008512258529663086, "loss": 0.00958251953125, "time": 1.0167131423950195, "tflops": 7.259996545256517, "tokens_per_sec": 119.99451459090102, "iter": 3410, "memory": 16131, "step": 3410} +{"base_lr": 1.8290968474452586e-06, "lr": 1.8290968474452586e-06, "data_time": 0.00856161117553711, "loss": 0.04541015625, "time": 1.0247278213500977, "tflops": 7.084940606089481, "tokens_per_sec": 117.10426661567624, "iter": 3420, "memory": 16131, "step": 3420} +{"base_lr": 1.8278542678874304e-06, "lr": 1.8278542678874304e-06, "data_time": 0.008669614791870117, "loss": 1.9296875, "time": 1.0872721672058105, "tflops": 4.393554063932335, "tokens_per_sec": 72.65890030363792, "iter": 3430, "memory": 16131, "step": 3430} +{"base_lr": 1.8266076127601745e-06, "lr": 1.8266076127601745e-06, "data_time": 0.007964611053466797, "loss": 1.6171875, "time": 1.017859935760498, "tflops": 5.347429463846558, "tokens_per_sec": 88.42081001318489, "iter": 3440, "memory": 16130, "step": 3440} +{"base_lr": 1.825356888200838e-06, "lr": 1.825356888200838e-06, "data_time": 0.008712291717529297, "loss": 0.03125, "time": 1.0141584873199463, "tflops": 6.38214283971542, "tokens_per_sec": 105.50619191942745, "iter": 3450, "memory": 16130, "step": 3450} +{"base_lr": 1.8241021003668036e-06, "lr": 1.8241021003668036e-06, "data_time": 0.00841379165649414, "loss": 2.65625, "time": 1.0410552024841309, "tflops": 4.879344820972211, "tokens_per_sec": 80.68736393563121, "iter": 3460, "memory": 16130, "step": 3460} +{"base_lr": 1.8228432554354567e-06, "lr": 1.8228432554354567e-06, "data_time": 0.008056640625, "loss": 0.06494140625, "time": 1.2178220748901367, "tflops": 5.762548458178777, "tokens_per_sec": 95.25200962576527, "iter": 3470, "memory": 16130, "step": 3470} +{"base_lr": 1.8215803596041563e-06, "lr": 1.8215803596041563e-06, "data_time": 0.008077144622802734, "loss": 0.00103759765625, "time": 1.0097570419311523, "tflops": 7.730154560245664, "tokens_per_sec": 127.75350370734802, "iter": 3480, "memory": 16131, "step": 3480} +{"base_lr": 1.820313419090203e-06, "lr": 1.820313419090203e-06, "data_time": 0.00813150405883789, "loss": 0.000782012939453125, "time": 1.1858720779418945, "tflops": 5.713438662106916, "tokens_per_sec": 94.44526275910287, "iter": 3490, "memory": 16131, "step": 3490} +{"base_lr": 1.8190424401308116e-06, "lr": 1.8190424401308116e-06, "data_time": 0.008548974990844727, "loss": 0.0218505859375, "time": 1.0555801391601562, "tflops": 5.959549192192201, "tokens_per_sec": 98.52402119146184, "iter": 3500, "memory": 16130, "step": 3500} +{"base_lr": 1.8177674289830764e-06, "lr": 1.8177674289830764e-06, "data_time": 0.008218526840209961, "loss": 1.671875, "time": 1.0319406986236572, "tflops": 5.27446414102496, "tokens_per_sec": 87.2143138844602, "iter": 3510, "memory": 16131, "step": 3510} +{"base_lr": 1.816488391923944e-06, "lr": 1.816488391923944e-06, "data_time": 0.008553504943847656, "loss": 0.017822265625, "time": 1.2586088180541992, "tflops": 4.950077180456208, "tokens_per_sec": 81.8363883380028, "iter": 3520, "memory": 16131, "step": 3520} +{"base_lr": 1.81520533525018e-06, "lr": 1.81520533525018e-06, "data_time": 0.00824117660522461, "loss": 0.138671875, "time": 1.0250351428985596, "tflops": 6.2553255656213995, "tokens_per_sec": 103.41108861902372, "iter": 3530, "memory": 16131, "step": 3530} +{"base_lr": 1.813918265278339e-06, "lr": 1.813918265278339e-06, "data_time": 0.008561849594116211, "loss": 2.109375, "time": 1.213369369506836, "tflops": 3.4381558102164194, "tokens_per_sec": 56.86644292659837, "iter": 3540, "memory": 16131, "step": 3540} +{"base_lr": 1.8126271883447326e-06, "lr": 1.8126271883447326e-06, "data_time": 0.008634567260742188, "loss": 1.78125, "time": 1.053377389907837, "tflops": 4.592391612220437, "tokens_per_sec": 75.94619057365897, "iter": 3550, "memory": 16131, "step": 3550} +{"base_lr": 1.8113321108053993e-06, "lr": 1.8113321108053993e-06, "data_time": 0.00825810432434082, "loss": 1.7578125, "time": 1.0181474685668945, "tflops": 4.156833330276824, "tokens_per_sec": 68.75231944392158, "iter": 3560, "memory": 16131, "step": 3560} +{"base_lr": 1.810033039036073e-06, "lr": 1.810033039036073e-06, "data_time": 0.008419275283813477, "loss": 0.177734375, "time": 0.9802560806274414, "tflops": 5.984985725720556, "tokens_per_sec": 98.95373455660012, "iter": 3570, "memory": 16130, "step": 3570} +{"base_lr": 1.8087299794321523e-06, "lr": 1.8087299794321523e-06, "data_time": 0.008353233337402344, "loss": 1.8984375, "time": 1.0852975845336914, "tflops": 6.801207343031782, "tokens_per_sec": 112.41156502924132, "iter": 3580, "memory": 16131, "step": 3580} +{"base_lr": 1.807422938408666e-06, "lr": 1.807422938408666e-06, "data_time": 0.008428096771240234, "loss": 0.037841796875, "time": 1.0642881393432617, "tflops": 5.8538759937836105, "tokens_per_sec": 96.77830297297237, "iter": 3590, "memory": 16130, "step": 3590} +{"base_lr": 1.8061119224002447e-06, "lr": 1.8061119224002447e-06, "data_time": 0.008830070495605469, "loss": 2.015625, "time": 1.0089993476867676, "tflops": 3.2350861866385454, "tokens_per_sec": 53.51836958442729, "iter": 3600, "memory": 16131, "step": 3600} +{"base_lr": 1.804796937861089e-06, "lr": 1.804796937861089e-06, "data_time": 0.008980989456176758, "loss": 1.5, "time": 1.038203477859497, "tflops": 4.36800245568947, "tokens_per_sec": 72.24017410783294, "iter": 3610, "memory": 16131, "step": 3610} +{"base_lr": 1.8034779912649355e-06, "lr": 1.8034779912649355e-06, "data_time": 0.00857090950012207, "loss": 1.265625, "time": 1.0040900707244873, "tflops": 4.27530103925669, "tokens_per_sec": 70.71078787653006, "iter": 3620, "memory": 16130, "step": 3620} +{"base_lr": 1.802155089105026e-06, "lr": 1.802155089105026e-06, "data_time": 0.008715152740478516, "loss": 2.46875, "time": 0.999962568283081, "tflops": 5.382587192626703, "tokens_per_sec": 89.00333154742232, "iter": 3630, "memory": 16131, "step": 3630} +{"base_lr": 1.8008282378940768e-06, "lr": 1.8008282378940768e-06, "data_time": 0.008325576782226562, "loss": 0.046630859375, "time": 1.2189998626708984, "tflops": 6.303821361479754, "tokens_per_sec": 104.18376891497883, "iter": 3640, "memory": 16131, "step": 3640} +{"base_lr": 1.7994974441642447e-06, "lr": 1.7994974441642447e-06, "data_time": 0.00888967514038086, "loss": 0.0162353515625, "time": 0.9924471378326416, "tflops": 6.643840744792655, "tokens_per_sec": 109.82952728135236, "iter": 3650, "memory": 16130, "step": 3650} +{"base_lr": 1.7981627144670964e-06, "lr": 1.7981627144670964e-06, "data_time": 0.008179664611816406, "loss": 0.0135498046875, "time": 1.0029585361480713, "tflops": 6.030665552360382, "tokens_per_sec": 99.70501909675839, "iter": 3660, "memory": 16131, "step": 3660} +{"base_lr": 1.7968240553735758e-06, "lr": 1.7968240553735758e-06, "data_time": 0.008284330368041992, "loss": 0.0019989013671875, "time": 1.0049140453338623, "tflops": 7.646780746696979, "tokens_per_sec": 126.3789680217679, "iter": 3670, "memory": 16131, "step": 3670} +{"base_lr": 1.7954814734739709e-06, "lr": 1.7954814734739709e-06, "data_time": 0.008599042892456055, "loss": 0.154296875, "time": 1.0106298923492432, "tflops": 6.404425969570364, "tokens_per_sec": 105.87456477382538, "iter": 3680, "memory": 16130, "step": 3680} +{"base_lr": 1.7941349753778807e-06, "lr": 1.7941349753778807e-06, "data_time": 0.008736133575439453, "loss": 2.046875, "time": 1.0165653228759766, "tflops": 5.235117411161232, "tokens_per_sec": 86.56600615782527, "iter": 3690, "memory": 16131, "step": 3690} +{"base_lr": 1.7927845677141867e-06, "lr": 1.7927845677141867e-06, "data_time": 0.008363485336303711, "loss": 0.056396484375, "time": 1.0058190822601318, "tflops": 7.94121043064313, "tokens_per_sec": 131.23632502900756, "iter": 3700, "memory": 16131, "step": 3700} +{"base_lr": 1.7914302571310143e-06, "lr": 1.7914302571310143e-06, "data_time": 0.012256383895874023, "loss": 1.4609375, "time": 1.068713665008545, "tflops": 4.866364139428825, "tokens_per_sec": 80.4705720678063, "iter": 3710, "memory": 16130, "step": 3710} +{"base_lr": 1.7900720502957052e-06, "lr": 1.7900720502957052e-06, "data_time": 0.008368492126464844, "loss": 1.2265625, "time": 1.028611421585083, "tflops": 4.408735160462617, "tokens_per_sec": 72.91383162395047, "iter": 3720, "memory": 16131, "step": 3720} +{"base_lr": 1.7887099538947824e-06, "lr": 1.7887099538947824e-06, "data_time": 0.008503437042236328, "loss": 1.53125, "time": 1.2025041580200195, "tflops": 4.274586283592834, "tokens_per_sec": 70.68582626764956, "iter": 3730, "memory": 16131, "step": 3730} +{"base_lr": 1.7873439746339172e-06, "lr": 1.7873439746339172e-06, "data_time": 0.00839376449584961, "loss": 1.90625, "time": 1.0198919773101807, "tflops": 4.209060781316668, "tokens_per_sec": 69.61521570861134, "iter": 3740, "memory": 16131, "step": 3740} +{"base_lr": 1.7859741192378953e-06, "lr": 1.7859741192378953e-06, "data_time": 0.008376598358154297, "loss": 1.8671875, "time": 1.2108867168426514, "tflops": 3.2453105730677523, "tokens_per_sec": 53.67967052230266, "iter": 3750, "memory": 16131, "step": 3750} +{"base_lr": 1.7846003944505861e-06, "lr": 1.7846003944505861e-06, "data_time": 0.008391141891479492, "loss": 2.28125, "time": 1.2429194450378418, "tflops": 4.866371282984458, "tokens_per_sec": 80.45573701429616, "iter": 3760, "memory": 16131, "step": 3760} +{"base_lr": 1.783222807034908e-06, "lr": 1.783222807034908e-06, "data_time": 0.008590221405029297, "loss": 0.059814453125, "time": 1.288163185119629, "tflops": 6.012411296050763, "tokens_per_sec": 99.36629262387555, "iter": 3770, "memory": 16131, "step": 3770} +{"base_lr": 1.7818413637727946e-06, "lr": 1.7818413637727946e-06, "data_time": 0.00818943977355957, "loss": 1.5078125, "time": 1.025787115097046, "tflops": 5.424162519081928, "tokens_per_sec": 89.68722520091951, "iter": 3780, "memory": 16131, "step": 3780} +{"base_lr": 1.7804560714651637e-06, "lr": 1.7804560714651637e-06, "data_time": 0.008545398712158203, "loss": 0.003936767578125, "time": 1.2770657539367676, "tflops": 5.068262388396494, "tokens_per_sec": 83.78581891345134, "iter": 3790, "memory": 16131, "step": 3790} +{"base_lr": 1.7790669369318802e-06, "lr": 1.7790669369318802e-06, "data_time": 0.010355710983276367, "loss": 1.546875, "time": 1.0789480209350586, "tflops": 4.764092134281436, "tokens_per_sec": 78.7804401608308, "iter": 3800, "memory": 16131, "step": 3800} +{"base_lr": 1.7776739670117251e-06, "lr": 1.7776739670117251e-06, "data_time": 0.013310670852661133, "loss": 1.96875, "time": 1.0619721412658691, "tflops": 5.41040631746302, "tokens_per_sec": 89.4562072849394, "iter": 3810, "memory": 16130, "step": 3810} +{"base_lr": 1.7762771685623626e-06, "lr": 1.7762771685623626e-06, "data_time": 0.008238792419433594, "loss": 2.125, "time": 1.2221908569335938, "tflops": 3.413340006629794, "tokens_per_sec": 56.45599425695308, "iter": 3820, "memory": 16131, "step": 3820} +{"base_lr": 1.7748765484603033e-06, "lr": 1.7748765484603033e-06, "data_time": 0.008069753646850586, "loss": 1.359375, "time": 1.0180847644805908, "tflops": 4.929971081073727, "tokens_per_sec": 81.52562821452655, "iter": 3830, "memory": 16131, "step": 3830} +{"base_lr": 1.7734721136008723e-06, "lr": 1.7734721136008723e-06, "data_time": 0.00828242301940918, "loss": 0.045166015625, "time": 1.2725598812103271, "tflops": 4.515073016926142, "tokens_per_sec": 74.65267560499485, "iter": 3840, "memory": 16130, "step": 3840} +{"base_lr": 1.7720638708981748e-06, "lr": 1.7720638708981748e-06, "data_time": 0.00921940803527832, "loss": 2.09375, "time": 1.2450933456420898, "tflops": 6.17171186468998, "tokens_per_sec": 102.0003845048297, "iter": 3850, "memory": 16131, "step": 3850} +{"base_lr": 1.7706518272850633e-06, "lr": 1.7706518272850633e-06, "data_time": 0.009045600891113281, "loss": 0.005767822265625, "time": 1.2468154430389404, "tflops": 5.43417024247987, "tokens_per_sec": 89.82885207687646, "iter": 3860, "memory": 16131, "step": 3860} +{"base_lr": 1.769235989713101e-06, "lr": 1.769235989713101e-06, "data_time": 0.008755207061767578, "loss": 2.0625, "time": 1.2190227508544922, "tflops": 3.174013861215187, "tokens_per_sec": 52.50107100551301, "iter": 3870, "memory": 16130, "step": 3870} +{"base_lr": 1.7678163651525297e-06, "lr": 1.7678163651525297e-06, "data_time": 0.008665323257446289, "loss": 0.000949859619140625, "time": 1.036799430847168, "tflops": 7.470072470082399, "tokens_per_sec": 123.45685789515512, "iter": 3880, "memory": 16131, "step": 3880} +{"base_lr": 1.766392960592235e-06, "lr": 1.766392960592235e-06, "data_time": 0.008764982223510742, "loss": 0.0012969970703125, "time": 1.22867751121521, "tflops": 5.859583999535822, "tokens_per_sec": 96.85210229184345, "iter": 3890, "memory": 16131, "step": 3890} +{"base_lr": 1.764965783039711e-06, "lr": 1.764965783039711e-06, "data_time": 0.010089635848999023, "loss": 1.765625, "time": 1.1814873218536377, "tflops": 6.606569413016812, "tokens_per_sec": 109.18441325083579, "iter": 3900, "memory": 16131, "step": 3900} +{"base_lr": 1.7635348395210265e-06, "lr": 1.7635348395210265e-06, "data_time": 0.008782386779785156, "loss": 0.014892578125, "time": 1.0095648765563965, "tflops": 6.651205596630366, "tokens_per_sec": 109.94835753251301, "iter": 3910, "memory": 16130, "step": 3910} +{"base_lr": 1.7621001370807906e-06, "lr": 1.7621001370807906e-06, "data_time": 0.00845026969909668, "loss": 0.047607421875, "time": 1.0184364318847656, "tflops": 6.176901737483813, "tokens_per_sec": 102.11732096762353, "iter": 3920, "memory": 16131, "step": 3920} +{"base_lr": 1.760661682782118e-06, "lr": 1.760661682782118e-06, "data_time": 0.00840306282043457, "loss": 2.546875, "time": 1.016709327697754, "tflops": 3.865119909792302, "tokens_per_sec": 63.93174354672507, "iter": 3930, "memory": 16130, "step": 3930} +{"base_lr": 1.7592194837065941e-06, "lr": 1.7592194837065941e-06, "data_time": 0.00843667984008789, "loss": 0.05615234375, "time": 1.2945873737335205, "tflops": 5.701688469291128, "tokens_per_sec": 94.23852145882151, "iter": 3940, "memory": 16131, "step": 3940} +{"base_lr": 1.7577735469542384e-06, "lr": 1.7577735469542384e-06, "data_time": 0.008453369140625, "loss": 2.609375, "time": 1.0267462730407715, "tflops": 5.890946627428299, "tokens_per_sec": 97.39504551961656, "iter": 3950, "memory": 16130, "step": 3950} +{"base_lr": 1.7563238796434729e-06, "lr": 1.7563238796434729e-06, "data_time": 0.0085906982421875, "loss": 2.203125, "time": 1.2473464012145996, "tflops": 3.9753120577889285, "tokens_per_sec": 65.7395571271036, "iter": 3960, "memory": 16131, "step": 3960} +{"base_lr": 1.754870488911085e-06, "lr": 1.754870488911085e-06, "data_time": 0.00869894027709961, "loss": 1.1875, "time": 1.0104587078094482, "tflops": 4.188463417422427, "tokens_per_sec": 69.2754681204957, "iter": 3970, "memory": 16130, "step": 3970} +{"base_lr": 1.7534133819121928e-06, "lr": 1.7534133819121928e-06, "data_time": 0.008410930633544922, "loss": 1.921875, "time": 1.0384047031402588, "tflops": 4.367156010639814, "tokens_per_sec": 72.2261751830658, "iter": 3980, "memory": 16131, "step": 3980} +{"base_lr": 1.7519525658202086e-06, "lr": 1.7519525658202086e-06, "data_time": 0.008136272430419922, "loss": 0.00933837890625, "time": 1.0213565826416016, "tflops": 6.633733500695404, "tokens_per_sec": 109.65807819069163, "iter": 3990, "memory": 16131, "step": 3990} +{"base_lr": 1.7504880478268064e-06, "lr": 1.7504880478268064e-06, "data_time": 0.008300065994262695, "loss": 0.1357421875, "time": 1.3025836944580078, "tflops": 5.34105294404861, "tokens_per_sec": 88.28607366205522, "iter": 4000, "memory": 16131, "step": 4000} +{"base_lr": 1.7490198351418848e-06, "lr": 1.7490198351418848e-06, "data_time": 0.008118867874145508, "loss": 0.007354736328125, "time": 1.0028412342071533, "tflops": 7.783463360075445, "tokens_per_sec": 128.6345192036891, "iter": 4010, "memory": 16131, "step": 4010} +{"base_lr": 1.7475479349935296e-06, "lr": 1.7475479349935296e-06, "data_time": 0.008174419403076172, "loss": 0.007598876953125, "time": 1.0125718116760254, "tflops": 6.451968790397083, "tokens_per_sec": 106.65910185780302, "iter": 4020, "memory": 16130, "step": 4020} +{"base_lr": 1.7460723546279822e-06, "lr": 1.7460723546279822e-06, "data_time": 0.0084075927734375, "loss": 0.000919342041015625, "time": 1.2563660144805908, "tflops": 5.441083829537292, "tokens_per_sec": 89.94194263255896, "iter": 4030, "memory": 16131, "step": 4030} +{"base_lr": 1.7445931013096008e-06, "lr": 1.7445931013096008e-06, "data_time": 0.008261442184448242, "loss": 0.0380859375, "time": 1.0216615200042725, "tflops": 5.268267041244954, "tokens_per_sec": 87.11300000761572, "iter": 4040, "memory": 16131, "step": 4040} +{"base_lr": 1.743110182320826e-06, "lr": 1.743110182320826e-06, "data_time": 0.00817728042602539, "loss": 2.359375, "time": 1.01456880569458, "tflops": 4.469756329331382, "tokens_per_sec": 73.92302974324211, "iter": 4050, "memory": 16131, "step": 4050} +{"base_lr": 1.741623604962145e-06, "lr": 1.741623604962145e-06, "data_time": 0.008122920989990234, "loss": 2.4375, "time": 1.0132648944854736, "tflops": 6.686708890537258, "tokens_per_sec": 110.53378105708676, "iter": 4060, "memory": 16130, "step": 4060} +{"base_lr": 1.7401333765520548e-06, "lr": 1.7401333765520548e-06, "data_time": 0.00830984115600586, "loss": 0.00823974609375, "time": 1.012542486190796, "tflops": 6.631646225902739, "tokens_per_sec": 109.62502958021493, "iter": 4070, "memory": 16130, "step": 4070} +{"base_lr": 1.7386395044270271e-06, "lr": 1.7386395044270271e-06, "data_time": 0.00837850570678711, "loss": 0.037353515625, "time": 1.2075471878051758, "tflops": 4.758158389253974, "tokens_per_sec": 78.67187382763257, "iter": 4080, "memory": 16130, "step": 4080} +{"base_lr": 1.7371419959414723e-06, "lr": 1.7371419959414723e-06, "data_time": 0.008246898651123047, "loss": 1.5625, "time": 0.9799697399139404, "tflops": 4.751094644979677, "tokens_per_sec": 78.57385474645723, "iter": 4090, "memory": 16131, "step": 4090} +{"base_lr": 1.7356408584677017e-06, "lr": 1.7356408584677017e-06, "data_time": 0.008313655853271484, "loss": 0.0712890625, "time": 1.0601894855499268, "tflops": 6.333607009534381, "tokens_per_sec": 104.69826527502195, "iter": 4100, "memory": 16131, "step": 4100} +{"base_lr": 1.7341360993958933e-06, "lr": 1.7341360993958933e-06, "data_time": 0.008582592010498047, "loss": 1.53125, "time": 1.0190861225128174, "tflops": 3.6779847445927514, "tokens_per_sec": 60.83882277492142, "iter": 4110, "memory": 16131, "step": 4110} +{"base_lr": 1.7326277261340533e-06, "lr": 1.7326277261340533e-06, "data_time": 0.008386850357055664, "loss": 0.0093994140625, "time": 1.0051615238189697, "tflops": 5.595706961803082, "tokens_per_sec": 92.52244320551294, "iter": 4120, "memory": 16131, "step": 4120} +{"base_lr": 1.7311157461079808e-06, "lr": 1.7311157461079808e-06, "data_time": 0.008998394012451172, "loss": 0.08056640625, "time": 1.0351719856262207, "tflops": 6.545199708362195, "tokens_per_sec": 108.19458172656992, "iter": 4130, "memory": 16131, "step": 4130} +{"base_lr": 1.7296001667612328e-06, "lr": 1.7296001667612328e-06, "data_time": 0.008642911911010742, "loss": 0.00070953369140625, "time": 1.0226366519927979, "tflops": 7.692068057392204, "tokens_per_sec": 127.12237503569197, "iter": 4140, "memory": 16131, "step": 4140} +{"base_lr": 1.728080995555086e-06, "lr": 1.728080995555086e-06, "data_time": 0.008293867111206055, "loss": 0.007415771484375, "time": 1.006922721862793, "tflops": 6.307687978044408, "tokens_per_sec": 104.27811163665785, "iter": 4150, "memory": 16131, "step": 4150} +{"base_lr": 1.726558239968497e-06, "lr": 1.726558239968497e-06, "data_time": 0.008562088012695312, "loss": 0.00168609619140625, "time": 1.2811756134033203, "tflops": 5.430300423587924, "tokens_per_sec": 89.76130890785826, "iter": 4160, "memory": 16131, "step": 4160} +{"base_lr": 1.7250319074980702e-06, "lr": 1.7250319074980702e-06, "data_time": 0.008398294448852539, "loss": 2.0625, "time": 1.0332698822021484, "tflops": 7.495589505485346, "tokens_per_sec": 123.87857442150263, "iter": 4170, "memory": 16131, "step": 4170} +{"base_lr": 1.723502005658021e-06, "lr": 1.723502005658021e-06, "data_time": 0.008303403854370117, "loss": 0.0150146484375, "time": 1.172471284866333, "tflops": 6.8124661904157335, "tokens_per_sec": 112.58271456510425, "iter": 4180, "memory": 16130, "step": 4180} +{"base_lr": 1.721968541980134e-06, "lr": 1.721968541980134e-06, "data_time": 0.007853031158447266, "loss": 0.07568359375, "time": 1.00337553024292, "tflops": 7.054537763893103, "tokens_per_sec": 116.6063915985248, "iter": 4190, "memory": 16131, "step": 4190} +{"base_lr": 1.7204315240137311e-06, "lr": 1.7204315240137311e-06, "data_time": 0.008785009384155273, "loss": 0.005615234375, "time": 0.9935007095336914, "tflops": 6.636795191097708, "tokens_per_sec": 109.71305702544534, "iter": 4200, "memory": 16130, "step": 4200} +{"base_lr": 1.7188909593256311e-06, "lr": 1.7188909593256311e-06, "data_time": 0.008547306060791016, "loss": 1.984375, "time": 1.0219590663909912, "tflops": 5.3259806479100815, "tokens_per_sec": 88.06614957460422, "iter": 4210, "memory": 16131, "step": 4210} +{"base_lr": 1.7173468555001131e-06, "lr": 1.7173468555001131e-06, "data_time": 0.008774042129516602, "loss": 1.8671875, "time": 1.0235540866851807, "tflops": 7.507524491285634, "tokens_per_sec": 124.0774685499721, "iter": 4220, "memory": 16131, "step": 4220} +{"base_lr": 1.7157992201388809e-06, "lr": 1.7157992201388809e-06, "data_time": 0.008774757385253906, "loss": 0.0027008056640625, "time": 1.2350270748138428, "tflops": 5.976657558230957, "tokens_per_sec": 98.78325948302827, "iter": 4230, "memory": 16131, "step": 4230} +{"base_lr": 1.714248060861024e-06, "lr": 1.714248060861024e-06, "data_time": 0.012374162673950195, "loss": 2.0, "time": 1.0690772533416748, "tflops": 5.544388840042112, "tokens_per_sec": 91.66783756139625, "iter": 4240, "memory": 16131, "step": 4240} +{"base_lr": 1.7126933853029786e-06, "lr": 1.7126933853029786e-06, "data_time": 0.008256196975708008, "loss": 1.734375, "time": 0.9947643280029297, "tflops": 3.7679107568654127, "tokens_per_sec": 62.32632016912761, "iter": 4250, "memory": 16130, "step": 4250} +{"base_lr": 1.7111352011184943e-06, "lr": 1.7111352011184943e-06, "data_time": 0.007447957992553711, "loss": 0.0308837890625, "time": 0.9993367195129395, "tflops": 6.840529995436285, "tokens_per_sec": 113.07500044125398, "iter": 4260, "memory": 16130, "step": 4260} +{"base_lr": 1.709573515978592e-06, "lr": 1.709573515978592e-06, "data_time": 0.008538961410522461, "loss": 1.421875, "time": 1.0132074356079102, "tflops": 5.551271278616086, "tokens_per_sec": 91.78771960364614, "iter": 4270, "memory": 16131, "step": 4270} +{"base_lr": 1.7080083375715281e-06, "lr": 1.7080083375715281e-06, "data_time": 0.00844120979309082, "loss": 0.0026702880859375, "time": 0.9968059062957764, "tflops": 7.22260877451954, "tokens_per_sec": 119.3813151068654, "iter": 4280, "memory": 16130, "step": 4280} +{"base_lr": 1.7064396736027568e-06, "lr": 1.7064396736027568e-06, "data_time": 0.008430242538452148, "loss": 0.0164794921875, "time": 1.2332122325897217, "tflops": 6.083736663648504, "tokens_per_sec": 100.55041356466425, "iter": 4290, "memory": 16131, "step": 4290} +{"base_lr": 1.704867531794893e-06, "lr": 1.704867531794893e-06, "data_time": 0.008435249328613281, "loss": 0.00927734375, "time": 1.008124828338623, "tflops": 6.840997904307778, "tokens_per_sec": 113.08123438221183, "iter": 4300, "memory": 16131, "step": 4300} +{"base_lr": 1.7032919198876713e-06, "lr": 1.7032919198876713e-06, "data_time": 0.008733987808227539, "loss": 0.01806640625, "time": 1.0132105350494385, "tflops": 7.285093912830812, "tokens_per_sec": 120.40932834746603, "iter": 4310, "memory": 16131, "step": 4310} +{"base_lr": 1.7017128456379116e-06, "lr": 1.7017128456379116e-06, "data_time": 0.008687257766723633, "loss": 1.1796875, "time": 0.9960479736328125, "tflops": 4.978265425210311, "tokens_per_sec": 82.32535196155774, "iter": 4320, "memory": 16131, "step": 4320} +{"base_lr": 1.7001303168194767e-06, "lr": 1.7001303168194767e-06, "data_time": 0.008561372756958008, "loss": 1.4765625, "time": 1.017251968383789, "tflops": 4.934007112264881, "tokens_per_sec": 81.59237099515165, "iter": 4330, "memory": 16131, "step": 4330} +{"base_lr": 1.6985443412232395e-06, "lr": 1.6985443412232395e-06, "data_time": 0.008636474609375, "loss": 1.4453125, "time": 1.0084996223449707, "tflops": 5.457101015861364, "tokens_per_sec": 90.23305312531093, "iter": 4340, "memory": 16131, "step": 4340} +{"base_lr": 1.6969549266570392e-06, "lr": 1.6969549266570392e-06, "data_time": 0.00833439826965332, "loss": 1.296875, "time": 1.2023060321807861, "tflops": 4.1242338109156815, "tokens_per_sec": 68.20226947643042, "iter": 4350, "memory": 16131, "step": 4350} +{"base_lr": 1.6953620809456464e-06, "lr": 1.6953620809456464e-06, "data_time": 0.008415460586547852, "loss": 2.140625, "time": 1.2666497230529785, "tflops": 8.89201298458739, "tokens_per_sec": 146.84406952818918, "iter": 4360, "memory": 16132, "step": 4360} +{"base_lr": 1.693765811930723e-06, "lr": 1.693765811930723e-06, "data_time": 0.008328914642333984, "loss": 0.06591796875, "time": 1.0083966255187988, "tflops": 5.757911883418931, "tokens_per_sec": 95.20063591100855, "iter": 4370, "memory": 16131, "step": 4370} +{"base_lr": 1.6921661274707838e-06, "lr": 1.6921661274707838e-06, "data_time": 0.008327722549438477, "loss": 1.0234375, "time": 1.0175495147705078, "tflops": 4.218750302135372, "tokens_per_sec": 69.77547428337495, "iter": 4380, "memory": 16131, "step": 4380} +{"base_lr": 1.6905630354411579e-06, "lr": 1.6905630354411579e-06, "data_time": 0.00875234603881836, "loss": 0.109375, "time": 1.287663459777832, "tflops": 4.603205866845117, "tokens_per_sec": 76.10684240184341, "iter": 4390, "memory": 16130, "step": 4390} +{"base_lr": 1.6889565437339514e-06, "lr": 1.6889565437339514e-06, "data_time": 0.008693218231201172, "loss": 1.0390625, "time": 1.0156259536743164, "tflops": 4.286328941435953, "tokens_per_sec": 70.89224112425306, "iter": 4400, "memory": 16130, "step": 4400} +{"base_lr": 1.6873466602580052e-06, "lr": 1.6873466602580052e-06, "data_time": 0.008686065673828125, "loss": 0.1240234375, "time": 1.2553789615631104, "tflops": 5.204071381629861, "tokens_per_sec": 86.02979921333069, "iter": 4410, "memory": 16130, "step": 4410} +{"base_lr": 1.6857333929388583e-06, "lr": 1.6857333929388583e-06, "data_time": 0.008541345596313477, "loss": 0.00433349609375, "time": 1.0613017082214355, "tflops": 6.954981645770577, "tokens_per_sec": 114.9531740642693, "iter": 4420, "memory": 16130, "step": 4420} +{"base_lr": 1.684116749718711e-06, "lr": 1.684116749718711e-06, "data_time": 0.008734703063964844, "loss": 2.40625, "time": 1.2403910160064697, "tflops": 4.583348462818731, "tokens_per_sec": 75.78255468389648, "iter": 4430, "memory": 16131, "step": 4430} +{"base_lr": 1.6824967385563808e-06, "lr": 1.6824967385563808e-06, "data_time": 0.008772134780883789, "loss": 2.453125, "time": 1.2300212383270264, "tflops": 3.0964411334499946, "tokens_per_sec": 51.21862780648909, "iter": 4440, "memory": 16131, "step": 4440} +{"base_lr": 1.6808733674272668e-06, "lr": 1.6808733674272668e-06, "data_time": 0.008307218551635742, "loss": 0.11865234375, "time": 1.039696216583252, "tflops": 7.332670203616353, "tokens_per_sec": 121.1892454643645, "iter": 4450, "memory": 16131, "step": 4450} +{"base_lr": 1.6792466443233088e-06, "lr": 1.6792466443233088e-06, "data_time": 0.008426666259765625, "loss": 1.640625, "time": 1.0170352458953857, "tflops": 6.125855337378077, "tokens_per_sec": 101.27475956766743, "iter": 4460, "memory": 16131, "step": 4460} +{"base_lr": 1.6776165772529487e-06, "lr": 1.6776165772529487e-06, "data_time": 0.008149862289428711, "loss": 0.043212890625, "time": 1.0083367824554443, "tflops": 7.440508572140753, "tokens_per_sec": 122.97478596180859, "iter": 4470, "memory": 16130, "step": 4470} +{"base_lr": 1.6759831742410923e-06, "lr": 1.6759831742410923e-06, "data_time": 0.008821725845336914, "loss": 0.00537109375, "time": 1.2789373397827148, "tflops": 5.629313384914961, "tokens_per_sec": 93.04599709327782, "iter": 4480, "memory": 16131, "step": 4480} +{"base_lr": 1.674346443329066e-06, "lr": 1.674346443329066e-06, "data_time": 0.008309125900268555, "loss": 2.265625, "time": 1.2379164695739746, "tflops": 6.305415748375948, "tokens_per_sec": 104.20735418787244, "iter": 4490, "memory": 16131, "step": 4490} +{"base_lr": 1.6727063925745806e-06, "lr": 1.6727063925745806e-06, "data_time": 0.008338212966918945, "loss": 0.00091552734375, "time": 1.0152435302734375, "tflops": 7.62867888779592, "tokens_per_sec": 126.07812429535939, "iter": 4500, "memory": 16131, "step": 4500} +{"base_lr": 1.6710630300516916e-06, "lr": 1.6710630300516916e-06, "data_time": 0.008545875549316406, "loss": 1.921875, "time": 1.0253896713256836, "tflops": 6.784902043296945, "tokens_per_sec": 112.15248526075861, "iter": 4510, "memory": 16130, "step": 4510} +{"base_lr": 1.6694163638507574e-06, "lr": 1.6694163638507574e-06, "data_time": 0.008337259292602539, "loss": 2.40625, "time": 1.2872488498687744, "tflops": 3.569940155568908, "tokens_per_sec": 59.04064315745055, "iter": 4520, "memory": 16131, "step": 4520} +{"base_lr": 1.6677664020784008e-06, "lr": 1.6677664020784008e-06, "data_time": 0.008243322372436523, "loss": 0.048095703125, "time": 1.2138235569000244, "tflops": 5.082815689047526, "tokens_per_sec": 84.031982589309, "iter": 4530, "memory": 16131, "step": 4530} +{"base_lr": 1.66611315285747e-06, "lr": 1.66611315285747e-06, "data_time": 0.008100509643554688, "loss": 1.8671875, "time": 1.0246777534484863, "tflops": 4.307528006455923, "tokens_per_sec": 71.24190971673973, "iter": 4540, "memory": 16130, "step": 4540} +{"base_lr": 1.6644566243269957e-06, "lr": 1.6644566243269957e-06, "data_time": 0.008301734924316406, "loss": 1.2421875, "time": 1.0426013469696045, "tflops": 4.523740763504861, "tokens_per_sec": 74.81287092774028, "iter": 4550, "memory": 16130, "step": 4550} +{"base_lr": 1.6627968246421536e-06, "lr": 1.6627968246421536e-06, "data_time": 0.008340120315551758, "loss": 0.0693359375, "time": 1.2300207614898682, "tflops": 5.360609298496749, "tokens_per_sec": 88.61639039969101, "iter": 4560, "memory": 16131, "step": 4560} +{"base_lr": 1.6611337619742239e-06, "lr": 1.6611337619742239e-06, "data_time": 0.008516788482666016, "loss": 0.054931640625, "time": 1.0092377662658691, "tflops": 7.01356093287565, "tokens_per_sec": 115.92907430800811, "iter": 4570, "memory": 16130, "step": 4570} +{"base_lr": 1.6594674445105502e-06, "lr": 1.6594674445105502e-06, "data_time": 0.008989334106445312, "loss": 1.09375, "time": 1.0415737628936768, "tflops": 4.179547406045982, "tokens_per_sec": 69.12616519823051, "iter": 4580, "memory": 16131, "step": 4580} +{"base_lr": 1.6577978804544996e-06, "lr": 1.6577978804544996e-06, "data_time": 0.00735163688659668, "loss": 0.00433349609375, "time": 1.0020225048065186, "tflops": 6.640810270741606, "tokens_per_sec": 109.77797352079453, "iter": 4590, "memory": 16131, "step": 4590} +{"base_lr": 1.6561250780254222e-06, "lr": 1.6561250780254222e-06, "data_time": 0.008406639099121094, "loss": 0.0179443359375, "time": 1.017164945602417, "tflops": 8.448648052429629, "tokens_per_sec": 139.60370991330294, "iter": 4600, "memory": 16131, "step": 4600} +{"base_lr": 1.6544490454586117e-06, "lr": 1.6544490454586117e-06, "data_time": 0.008746623992919922, "loss": 0.005523681640625, "time": 1.0270187854766846, "tflops": 7.423184050731904, "tokens_per_sec": 122.68519503408612, "iter": 4610, "memory": 16130, "step": 4610} +{"base_lr": 1.652769791005262e-06, "lr": 1.652769791005262e-06, "data_time": 0.008371591567993164, "loss": 0.08203125, "time": 1.293555498123169, "tflops": 5.425170184175864, "tokens_per_sec": 89.67531750142591, "iter": 4620, "memory": 16131, "step": 4620} +{"base_lr": 1.6510873229324305e-06, "lr": 1.6510873229324305e-06, "data_time": 0.008459806442260742, "loss": 0.001190185546875, "time": 1.0060455799102783, "tflops": 6.13256649792755, "tokens_per_sec": 101.38705644827267, "iter": 4630, "memory": 16131, "step": 4630} +{"base_lr": 1.6494016495229948e-06, "lr": 1.6494016495229948e-06, "data_time": 0.00842142105102539, "loss": 0.09716796875, "time": 1.0086803436279297, "tflops": 6.95736638899119, "tokens_per_sec": 115.00174533258648, "iter": 4640, "memory": 16131, "step": 4640} +{"base_lr": 1.6477127790756119e-06, "lr": 1.6477127790756119e-06, "data_time": 0.008763551712036133, "loss": 0.109375, "time": 0.9991722106933594, "tflops": 7.2661505934344195, "tokens_per_sec": 120.09941701301705, "iter": 4650, "memory": 16131, "step": 4650} +{"base_lr": 1.646020719904678e-06, "lr": 1.646020719904678e-06, "data_time": 0.008440971374511719, "loss": 1.9140625, "time": 1.0095245838165283, "tflops": 7.071590072210034, "tokens_per_sec": 116.88670280200775, "iter": 4660, "memory": 16131, "step": 4660} +{"base_lr": 1.6443254803402893e-06, "lr": 1.6443254803402893e-06, "data_time": 0.00832366943359375, "loss": 0.01055908203125, "time": 1.2294292449951172, "tflops": 5.560297864394457, "tokens_per_sec": 91.91256874677393, "iter": 4670, "memory": 16130, "step": 4670} +{"base_lr": 1.6426270687281973e-06, "lr": 1.6426270687281973e-06, "data_time": 0.008811712265014648, "loss": 1.9921875, "time": 1.0287632942199707, "tflops": 8.235525421011172, "tokens_per_sec": 136.0857262175307, "iter": 4680, "memory": 16131, "step": 4680} +{"base_lr": 1.6409254934297698e-06, "lr": 1.6409254934297698e-06, "data_time": 0.008209228515625, "loss": 2.09375, "time": 1.212629795074463, "tflops": 8.737559573604768, "tokens_per_sec": 144.31444840847706, "iter": 4690, "memory": 16132, "step": 4690} +{"base_lr": 1.6392207628219509e-06, "lr": 1.6392207628219509e-06, "data_time": 0.008422374725341797, "loss": 0.01141357421875, "time": 0.9878237247467041, "tflops": 6.674936596684859, "tokens_per_sec": 110.34357372600991, "iter": 4700, "memory": 16131, "step": 4700} +{"base_lr": 1.6375128852972175e-06, "lr": 1.6375128852972175e-06, "data_time": 0.008663654327392578, "loss": 2.390625, "time": 1.3465375900268555, "tflops": 3.5026561815317994, "tokens_per_sec": 57.926344260754306, "iter": 4710, "memory": 16131, "step": 4710} +{"base_lr": 1.6358018692635395e-06, "lr": 1.6358018692635395e-06, "data_time": 0.00868678092956543, "loss": 2.234375, "time": 1.0201289653778076, "tflops": 7.710976741258418, "tokens_per_sec": 127.43486795488323, "iter": 4720, "memory": 16131, "step": 4720} +{"base_lr": 1.6340877231443357e-06, "lr": 1.6340877231443357e-06, "data_time": 0.008123397827148438, "loss": 0.00191497802734375, "time": 1.012152910232544, "tflops": 7.412455566363847, "tokens_per_sec": 122.51113319566335, "iter": 4730, "memory": 16131, "step": 4730} +{"base_lr": 1.6323704553784375e-06, "lr": 1.6323704553784375e-06, "data_time": 0.008501768112182617, "loss": 0.031005859375, "time": 1.0169177055358887, "tflops": 6.305258036248644, "tokens_per_sec": 104.23655662877515, "iter": 4740, "memory": 16130, "step": 4740} +{"base_lr": 1.630650074420043e-06, "lr": 1.630650074420043e-06, "data_time": 0.008292913436889648, "loss": 0.255859375, "time": 1.2293994426727295, "tflops": 5.609714465912837, "tokens_per_sec": 92.72820211474138, "iter": 4750, "memory": 16131, "step": 4750} +{"base_lr": 1.6289265887386772e-06, "lr": 1.6289265887386772e-06, "data_time": 0.008468389511108398, "loss": 0.00457763671875, "time": 1.0070765018463135, "tflops": 7.088780257985168, "tokens_per_sec": 117.17084033193974, "iter": 4760, "memory": 16131, "step": 4760} +{"base_lr": 1.6272000068191487e-06, "lr": 1.6272000068191487e-06, "data_time": 0.00832676887512207, "loss": 0.00335693359375, "time": 1.0331947803497314, "tflops": 6.440451953484638, "tokens_per_sec": 106.4658882255087, "iter": 4770, "memory": 16131, "step": 4770} +{"base_lr": 1.62547033716151e-06, "lr": 1.62547033716151e-06, "data_time": 0.008851289749145508, "loss": 1.703125, "time": 1.0168507099151611, "tflops": 6.782293379645297, "tokens_per_sec": 112.11085254530553, "iter": 4780, "memory": 16132, "step": 4780} +{"base_lr": 1.6237375882810138e-06, "lr": 1.6237375882810138e-06, "data_time": 0.008695363998413086, "loss": 1.2890625, "time": 1.0356526374816895, "tflops": 4.43720335664844, "tokens_per_sec": 73.38367831972069, "iter": 4790, "memory": 16131, "step": 4790} +{"base_lr": 1.6220017687080731e-06, "lr": 1.6220017687080731e-06, "data_time": 0.008603096008300781, "loss": 0.004791259765625, "time": 1.2789020538330078, "tflops": 5.913778264196642, "tokens_per_sec": 97.74008855898208, "iter": 4800, "memory": 16131, "step": 4800} +{"base_lr": 1.620262886988217e-06, "lr": 1.620262886988217e-06, "data_time": 0.008403778076171875, "loss": 0.09521484375, "time": 1.0276727676391602, "tflops": 6.592961876366865, "tokens_per_sec": 108.98410810007648, "iter": 4810, "memory": 16131, "step": 4810} +{"base_lr": 1.6185209516820514e-06, "lr": 1.6185209516820514e-06, "data_time": 0.008565425872802734, "loss": 1.1796875, "time": 1.0440027713775635, "tflops": 3.937957140810341, "tokens_per_sec": 65.13392671382351, "iter": 4820, "memory": 16131, "step": 4820} +{"base_lr": 1.6167759713652132e-06, "lr": 1.6167759713652132e-06, "data_time": 0.008951902389526367, "loss": 1.6953125, "time": 1.0057437419891357, "tflops": 6.134406967866085, "tokens_per_sec": 101.41748413782365, "iter": 4830, "memory": 16130, "step": 4830} +{"base_lr": 1.6150279546283311e-06, "lr": 1.6150279546283311e-06, "data_time": 0.00864100456237793, "loss": 0.1943359375, "time": 1.0163516998291016, "tflops": 6.964469652242676, "tokens_per_sec": 115.11763105188716, "iter": 4840, "memory": 16131, "step": 4840} +{"base_lr": 1.6132769100769813e-06, "lr": 1.6132769100769813e-06, "data_time": 0.008035898208618164, "loss": 0.0003986358642578125, "time": 1.2628862857818604, "tflops": 5.029236930530772, "tokens_per_sec": 83.14287769378281, "iter": 4850, "memory": 16131, "step": 4850} +{"base_lr": 1.6115228463316475e-06, "lr": 1.6115228463316475e-06, "data_time": 0.008839130401611328, "loss": 1.7890625, "time": 1.2269055843353271, "tflops": 3.7948551567401516, "tokens_per_sec": 62.75951546968611, "iter": 4860, "memory": 16130, "step": 4860} +{"base_lr": 1.609765772027676e-06, "lr": 1.609765772027676e-06, "data_time": 0.008536100387573242, "loss": 0.003997802734375, "time": 1.0040104389190674, "tflops": 7.110428087037405, "tokens_per_sec": 117.52865849375333, "iter": 4870, "memory": 16132, "step": 4870} +{"base_lr": 1.608005695815235e-06, "lr": 1.608005695815235e-06, "data_time": 0.008675098419189453, "loss": 1.8515625, "time": 1.0337257385253906, "tflops": 4.621137764879252, "tokens_per_sec": 76.4225916562908, "iter": 4880, "memory": 16130, "step": 4880} +{"base_lr": 1.6062426263592699e-06, "lr": 1.6062426263592699e-06, "data_time": 0.008306026458740234, "loss": 0.0272216796875, "time": 1.0329716205596924, "tflops": 7.321733741237498, "tokens_per_sec": 121.01010087009995, "iter": 4890, "memory": 16131, "step": 4890} +{"base_lr": 1.6044765723394647e-06, "lr": 1.6044765723394647e-06, "data_time": 0.008721113204956055, "loss": 0.0024261474609375, "time": 1.0012671947479248, "tflops": 8.280032328144847, "tokens_per_sec": 136.82661403318397, "iter": 4900, "memory": 16131, "step": 4900} +{"base_lr": 1.6027075424501925e-06, "lr": 1.6027075424501925e-06, "data_time": 0.009385824203491211, "loss": 1.5, "time": 1.0118558406829834, "tflops": 3.883659417633683, "tokens_per_sec": 64.23839976657347, "iter": 4910, "memory": 16131, "step": 4910} +{"base_lr": 1.600935545400481e-06, "lr": 1.600935545400481e-06, "data_time": 0.008954524993896484, "loss": 0.0556640625, "time": 1.2796037197113037, "tflops": 5.200236009735958, "tokens_per_sec": 85.96411397173146, "iter": 4920, "memory": 16131, "step": 4920} +{"base_lr": 1.5991605899139623e-06, "lr": 1.5991605899139623e-06, "data_time": 0.008689165115356445, "loss": 1.8671875, "time": 0.9901068210601807, "tflops": 6.108944374221466, "tokens_per_sec": 100.99920318983521, "iter": 4930, "memory": 16130, "step": 4930} +{"base_lr": 1.5973826847288346e-06, "lr": 1.5973826847288346e-06, "data_time": 0.008162736892700195, "loss": 1.9609375, "time": 1.2480368614196777, "tflops": 7.566030606733128, "tokens_per_sec": 124.99630805969987, "iter": 4940, "memory": 16131, "step": 4940} +{"base_lr": 1.5956018385978173e-06, "lr": 1.5956018385978173e-06, "data_time": 0.00853729248046875, "loss": 0.07177734375, "time": 1.2269411087036133, "tflops": 5.324690550040771, "tokens_per_sec": 88.02378470636201, "iter": 4950, "memory": 16131, "step": 4950} +{"base_lr": 1.593818060288108e-06, "lr": 1.593818060288108e-06, "data_time": 0.009150266647338867, "loss": 1.8125, "time": 1.0135385990142822, "tflops": 6.445814430080223, "tokens_per_sec": 106.5573625956909, "iter": 4960, "memory": 16131, "step": 4960} +{"base_lr": 1.59203135858134e-06, "lr": 1.59203135858134e-06, "data_time": 0.008867979049682617, "loss": 0.039794921875, "time": 1.014521837234497, "tflops": 7.215945241384357, "tokens_per_sec": 119.26800938037643, "iter": 4970, "memory": 16131, "step": 4970} +{"base_lr": 1.5902417422735373e-06, "lr": 1.5902417422735373e-06, "data_time": 0.00845789909362793, "loss": 0.05908203125, "time": 1.0384385585784912, "tflops": 6.057923902823229, "tokens_per_sec": 100.1503643530576, "iter": 4980, "memory": 16130, "step": 4980} +{"base_lr": 1.5884492201750744e-06, "lr": 1.5884492201750744e-06, "data_time": 0.008820056915283203, "loss": 0.000579833984375, "time": 1.0358636379241943, "tflops": 9.174316790988941, "tokens_per_sec": 151.5643509935985, "iter": 4990, "memory": 16131, "step": 4990} +{"base_lr": 1.5866538011106306e-06, "lr": 1.5866538011106306e-06, "data_time": 0.008489847183227539, "loss": 1.671875, "time": 1.03572678565979, "tflops": 3.4436469725135788, "tokens_per_sec": 56.96482973775579, "iter": 5000, "memory": 16130, "step": 5000} +{"base_lr": 1.5848554939191467e-06, "lr": 1.5848554939191467e-06, "data_time": 0.008719921112060547, "loss": 0.03955078125, "time": 1.0356907844543457, "tflops": 6.36643757997664, "tokens_per_sec": 105.24376738306256, "iter": 5010, "memory": 16130, "step": 5010} +{"base_lr": 1.583054307453782e-06, "lr": 1.583054307453782e-06, "data_time": 0.009276866912841797, "loss": 2.078125, "time": 1.0138933658599854, "tflops": 4.233963321413607, "tokens_per_sec": 70.027088045603, "iter": 5020, "memory": 16130, "step": 5020} +{"base_lr": 1.5812502505818703e-06, "lr": 1.5812502505818703e-06, "data_time": 0.00861668586730957, "loss": 0.0037384033203125, "time": 1.010505199432373, "tflops": 6.704970327942119, "tokens_per_sec": 110.83564939873882, "iter": 5030, "memory": 16131, "step": 5030} +{"base_lr": 1.5794433321848778e-06, "lr": 1.5794433321848778e-06, "data_time": 0.008245229721069336, "loss": 0.053955078125, "time": 1.0455687046051025, "tflops": 6.4221734330214835, "tokens_per_sec": 106.16232057348834, "iter": 5040, "memory": 16130, "step": 5040} +{"base_lr": 1.5776335611583575e-06, "lr": 1.5776335611583575e-06, "data_time": 0.008903026580810547, "loss": 0.05224609375, "time": 1.2639577388763428, "tflops": 7.566707772580133, "tokens_per_sec": 125.00417944379757, "iter": 5050, "memory": 16131, "step": 5050} +{"base_lr": 1.5758209464119047e-06, "lr": 1.5758209464119047e-06, "data_time": 0.008663654327392578, "loss": 0.00183868408203125, "time": 1.000227451324463, "tflops": 7.5614233122452434, "tokens_per_sec": 124.97157504960977, "iter": 5060, "memory": 16131, "step": 5060} +{"base_lr": 1.5740054968691165e-06, "lr": 1.5740054968691165e-06, "data_time": 0.008421659469604492, "loss": 0.00040435791015625, "time": 1.110774278640747, "tflops": 7.791233915821733, "tokens_per_sec": 128.73902713597235, "iter": 5070, "memory": 16131, "step": 5070} +{"base_lr": 1.572187221467545e-06, "lr": 1.572187221467545e-06, "data_time": 0.008634567260742188, "loss": 0.000926971435546875, "time": 1.0197668075561523, "tflops": 7.594841122480775, "tokens_per_sec": 125.51889221284179, "iter": 5080, "memory": 16130, "step": 5080} +{"base_lr": 1.570366129158654e-06, "lr": 1.570366129158654e-06, "data_time": 0.008478164672851562, "loss": 0.03515625, "time": 1.0162253379821777, "tflops": 9.590374455002351, "tokens_per_sec": 158.42942896850417, "iter": 5090, "memory": 16131, "step": 5090} +{"base_lr": 1.568542228907775e-06, "lr": 1.568542228907775e-06, "data_time": 0.008062124252319336, "loss": 1.7578125, "time": 1.0181171894073486, "tflops": 4.394730758324735, "tokens_per_sec": 72.68318497107695, "iter": 5100, "memory": 16130, "step": 5100} +{"base_lr": 1.5667155296940637e-06, "lr": 1.5667155296940637e-06, "data_time": 0.008692741394042969, "loss": 1.8671875, "time": 1.2327461242675781, "tflops": 3.8259769148934883, "tokens_per_sec": 63.27336867214206, "iter": 5110, "memory": 16130, "step": 5110} +{"base_lr": 1.564886040510455e-06, "lr": 1.564886040510455e-06, "data_time": 0.008649587631225586, "loss": 0.048095703125, "time": 1.296377420425415, "tflops": 5.039490524932998, "tokens_per_sec": 83.30907210993828, "iter": 5120, "memory": 16130, "step": 5120} +{"base_lr": 1.5630537703636189e-06, "lr": 1.5630537703636189e-06, "data_time": 0.008417129516601562, "loss": 0.00946044921875, "time": 1.008056640625, "tflops": 6.540962546804272, "tokens_per_sec": 108.12884475649238, "iter": 5130, "memory": 16130, "step": 5130} +{"base_lr": 1.5612187282739156e-06, "lr": 1.5612187282739156e-06, "data_time": 0.008835554122924805, "loss": 0.00058746337890625, "time": 1.2506170272827148, "tflops": 6.338315311967782, "tokens_per_sec": 104.74829395576536, "iter": 5140, "memory": 16130, "step": 5140} +{"base_lr": 1.559380923275353e-06, "lr": 1.559380923275353e-06, "data_time": 0.008625984191894531, "loss": 0.08544921875, "time": 1.0358006954193115, "tflops": 7.009201464895669, "tokens_per_sec": 115.85240339243632, "iter": 5150, "memory": 16131, "step": 5150} +{"base_lr": 1.5575403644155398e-06, "lr": 1.5575403644155398e-06, "data_time": 0.009721755981445312, "loss": 1.2578125, "time": 1.0018398761749268, "tflops": 4.224496781496445, "tokens_per_sec": 69.87144519261254, "iter": 5160, "memory": 16132, "step": 5160} +{"base_lr": 1.5556970607556427e-06, "lr": 1.5556970607556427e-06, "data_time": 0.00914907455444336, "loss": 2.34375, "time": 1.2245135307312012, "tflops": 4.346084128767276, "tokens_per_sec": 71.86527367107178, "iter": 5170, "memory": 16131, "step": 5170} +{"base_lr": 1.5538510213703412e-06, "lr": 1.5538510213703412e-06, "data_time": 0.009752511978149414, "loss": 0.01251220703125, "time": 1.022573471069336, "tflops": 7.573995516693074, "tokens_per_sec": 125.17437975974612, "iter": 5180, "memory": 16132, "step": 5180} +{"base_lr": 1.5520022553477826e-06, "lr": 1.5520022553477826e-06, "data_time": 0.010068893432617188, "loss": 0.0038299560546875, "time": 1.0443034172058105, "tflops": 8.6355530180996, "tokens_per_sec": 142.6788398323248, "iter": 5190, "memory": 16131, "step": 5190} +{"base_lr": 1.5501507717895379e-06, "lr": 1.5501507717895379e-06, "data_time": 0.008951902389526367, "loss": 1.65625, "time": 1.2280352115631104, "tflops": 3.4463745767431937, "tokens_per_sec": 57.001622869464114, "iter": 5200, "memory": 16131, "step": 5200} +{"base_lr": 1.5482965798105568e-06, "lr": 1.5482965798105568e-06, "data_time": 0.008887290954589844, "loss": 0.0004520416259765625, "time": 1.06292724609375, "tflops": 5.975342499550375, "tokens_per_sec": 98.7838070627839, "iter": 5210, "memory": 16131, "step": 5210} +{"base_lr": 1.546439688539122e-06, "lr": 1.546439688539122e-06, "data_time": 0.008635997772216797, "loss": 2.0, "time": 1.0285232067108154, "tflops": 7.648043985578293, "tokens_per_sec": 126.39481457653199, "iter": 5220, "memory": 16131, "step": 5220} +{"base_lr": 1.5445801071168056e-06, "lr": 1.5445801071168056e-06, "data_time": 0.008353710174560547, "loss": 0.138671875, "time": 1.2354676723480225, "tflops": 6.268854344565989, "tokens_per_sec": 103.6044915336641, "iter": 5230, "memory": 16131, "step": 5230} +{"base_lr": 1.542717844698423e-06, "lr": 1.542717844698423e-06, "data_time": 0.008089065551757812, "loss": 2.3125, "time": 1.0465097427368164, "tflops": 4.50684596689667, "tokens_per_sec": 74.5334675967192, "iter": 5240, "memory": 16131, "step": 5240} +{"base_lr": 1.5408529104519886e-06, "lr": 1.5408529104519886e-06, "data_time": 0.008582592010498047, "loss": 1.328125, "time": 1.0175118446350098, "tflops": 5.051742450898725, "tokens_per_sec": 83.53711108926372, "iter": 5250, "memory": 16131, "step": 5250} +{"base_lr": 1.5389853135586707e-06, "lr": 1.5389853135586707e-06, "data_time": 0.009012699127197266, "loss": 0.0024261474609375, "time": 1.0402629375457764, "tflops": 7.794815502429055, "tokens_per_sec": 128.8135866072558, "iter": 5260, "memory": 16131, "step": 5260} +{"base_lr": 1.5371150632127452e-06, "lr": 1.5371150632127452e-06, "data_time": 0.008728981018066406, "loss": 1.4453125, "time": 1.0374343395233154, "tflops": 3.9045603569757863, "tokens_per_sec": 64.58240049266236, "iter": 5270, "memory": 16131, "step": 5270} +{"base_lr": 1.535242168621552e-06, "lr": 1.535242168621552e-06, "data_time": 0.008498430252075195, "loss": 2.3125, "time": 1.0380001068115234, "tflops": 6.6441032064340755, "tokens_per_sec": 109.82657829397498, "iter": 5280, "memory": 16131, "step": 5280} +{"base_lr": 1.533366639005446e-06, "lr": 1.533366639005446e-06, "data_time": 0.008565187454223633, "loss": 2.0625, "time": 1.0385355949401855, "tflops": 8.333178250925632, "tokens_per_sec": 137.6938842506389, "iter": 5290, "memory": 16131, "step": 5290} +{"base_lr": 1.5314884835977587e-06, "lr": 1.5314884835977587e-06, "data_time": 0.008499383926391602, "loss": 1.796875, "time": 1.140002727508545, "tflops": 4.668268498797466, "tokens_per_sec": 77.19279776833972, "iter": 5300, "memory": 16131, "step": 5300} +{"base_lr": 1.529607711644746e-06, "lr": 1.529607711644746e-06, "data_time": 0.008867740631103516, "loss": 0.0004138946533203125, "time": 1.0630199909210205, "tflops": 6.772722194121119, "tokens_per_sec": 111.94521365189398, "iter": 5310, "memory": 16131, "step": 5310} +{"base_lr": 1.5277243324055451e-06, "lr": 1.5277243324055451e-06, "data_time": 0.00824284553527832, "loss": 0.018798828125, "time": 1.3036584854125977, "tflops": 5.754987642405251, "tokens_per_sec": 95.1169354454513, "iter": 5320, "memory": 16131, "step": 5320} +{"base_lr": 1.5258383551521298e-06, "lr": 1.5258383551521298e-06, "data_time": 0.008987665176391602, "loss": 0.00022125244140625, "time": 1.0526700019836426, "tflops": 7.818120364410332, "tokens_per_sec": 129.19528412854316, "iter": 5330, "memory": 16131, "step": 5330} +{"base_lr": 1.5239497891692645e-06, "lr": 1.5239497891692645e-06, "data_time": 0.008301019668579102, "loss": 1.140625, "time": 1.0332872867584229, "tflops": 5.677819446043328, "tokens_per_sec": 93.87515093136359, "iter": 5340, "memory": 16131, "step": 5340} +{"base_lr": 1.5220586437544563e-06, "lr": 1.5220586437544563e-06, "data_time": 0.008466720581054688, "loss": 0.064453125, "time": 1.2318553924560547, "tflops": 5.4509836123919015, "tokens_per_sec": 90.10797913430387, "iter": 5350, "memory": 16131, "step": 5350} +{"base_lr": 1.5201649282179125e-06, "lr": 1.5201649282179125e-06, "data_time": 0.008908748626708984, "loss": 1.984375, "time": 1.0335254669189453, "tflops": 5.3835499950026655, "tokens_per_sec": 89.01570686416972, "iter": 5360, "memory": 16131, "step": 5360} +{"base_lr": 1.518268651882493e-06, "lr": 1.518268651882493e-06, "data_time": 0.008708477020263672, "loss": 2.34375, "time": 1.0282855033874512, "tflops": 5.999930367997534, "tokens_per_sec": 99.19424096117775, "iter": 5370, "memory": 16131, "step": 5370} +{"base_lr": 1.5163698240836636e-06, "lr": 1.5163698240836636e-06, "data_time": 0.008763790130615234, "loss": 1.078125, "time": 1.1359577178955078, "tflops": 4.8981013418132475, "tokens_per_sec": 80.98892991401087, "iter": 5380, "memory": 16132, "step": 5380} +{"base_lr": 1.514468454169452e-06, "lr": 1.514468454169452e-06, "data_time": 0.008684158325195312, "loss": 0.1513671875, "time": 1.039954423904419, "tflops": 6.6898782448881, "tokens_per_sec": 110.5817691203542, "iter": 5390, "memory": 16130, "step": 5390} +{"base_lr": 1.512564551500401e-06, "lr": 1.512564551500401e-06, "data_time": 0.008595466613769531, "loss": 0.000804901123046875, "time": 1.0064537525177002, "tflops": 6.792157899226457, "tokens_per_sec": 112.27540233936423, "iter": 5400, "memory": 16131, "step": 5400} +{"base_lr": 1.5106581254495223e-06, "lr": 1.5106581254495223e-06, "data_time": 0.0073549747467041016, "loss": 0.984375, "time": 1.225006341934204, "tflops": 3.899562708618757, "tokens_per_sec": 64.48946205062067, "iter": 5410, "memory": 16131, "step": 5410} +{"base_lr": 1.5087491854022497e-06, "lr": 1.5087491854022497e-06, "data_time": 0.008458137512207031, "loss": 0.00153350830078125, "time": 1.2500548362731934, "tflops": 5.8563303074763935, "tokens_per_sec": 96.795753665209, "iter": 5420, "memory": 16131, "step": 5420} +{"base_lr": 1.5068377407563947e-06, "lr": 1.5068377407563947e-06, "data_time": 0.012373208999633789, "loss": 0.0186767578125, "time": 1.2593166828155518, "tflops": 5.428334984085824, "tokens_per_sec": 89.73120227968982, "iter": 5430, "memory": 16130, "step": 5430} +{"base_lr": 1.504923800922097e-06, "lr": 1.504923800922097e-06, "data_time": 0.009055137634277344, "loss": 0.00799560546875, "time": 1.6278412342071533, "tflops": 4.45997779090024, "tokens_per_sec": 73.71726276387928, "iter": 5440, "memory": 16131, "step": 5440} +{"base_lr": 1.5030073753217813e-06, "lr": 1.5030073753217813e-06, "data_time": 0.008271217346191406, "loss": 2.25, "time": 1.2191014289855957, "tflops": 3.9184508649152963, "tokens_per_sec": 64.80182708478198, "iter": 5450, "memory": 16131, "step": 5450} +{"base_lr": 1.5010884733901115e-06, "lr": 1.5010884733901115e-06, "data_time": 0.008915424346923828, "loss": 1.3671875, "time": 1.02726411819458, "tflops": 5.0627192779353285, "tokens_per_sec": 83.7175157554043, "iter": 5460, "memory": 16131, "step": 5460} +{"base_lr": 1.499167104573941e-06, "lr": 1.499167104573941e-06, "data_time": 0.009148597717285156, "loss": 1.265625, "time": 1.0350439548492432, "tflops": 4.439812761405763, "tokens_per_sec": 73.42683336670099, "iter": 5470, "memory": 16131, "step": 5470} +{"base_lr": 1.4972432783322672e-06, "lr": 1.4972432783322672e-06, "data_time": 0.009204864501953125, "loss": 0.004852294921875, "time": 1.04787015914917, "tflops": 6.986298788789983, "tokens_per_sec": 115.4723215881363, "iter": 5480, "memory": 16131, "step": 5480} +{"base_lr": 1.4953170041361865e-06, "lr": 1.4953170041361865e-06, "data_time": 0.008821249008178711, "loss": 0.012939453125, "time": 1.0244581699371338, "tflops": 6.5545121842559, "tokens_per_sec": 108.34995830693916, "iter": 5490, "memory": 16131, "step": 5490} +{"base_lr": 1.4933882914688467e-06, "lr": 1.4933882914688467e-06, "data_time": 0.00851893424987793, "loss": 0.08349609375, "time": 1.040855884552002, "tflops": 6.975159442748294, "tokens_per_sec": 115.2897358614966, "iter": 5500, "memory": 16131, "step": 5500} +{"base_lr": 1.4914571498254e-06, "lr": 1.4914571498254e-06, "data_time": 0.008973836898803711, "loss": 0.0001888275146484375, "time": 1.0045757293701172, "tflops": 8.494169624963801, "tokens_per_sec": 140.35776087111776, "iter": 5510, "memory": 16131, "step": 5510} +{"base_lr": 1.4895235887129566e-06, "lr": 1.4895235887129566e-06, "data_time": 0.008168697357177734, "loss": 0.000659942626953125, "time": 1.0220816135406494, "tflops": 8.467329925432946, "tokens_per_sec": 139.91054931952633, "iter": 5520, "memory": 16131, "step": 5520} +{"base_lr": 1.4875876176505379e-06, "lr": 1.4875876176505379e-06, "data_time": 0.008634567260742188, "loss": 2.53125, "time": 1.0240187644958496, "tflops": 6.971497273376344, "tokens_per_sec": 115.2322634028871, "iter": 5530, "memory": 16131, "step": 5530} +{"base_lr": 1.4856492461690302e-06, "lr": 1.4856492461690302e-06, "data_time": 0.008304119110107422, "loss": 0.1640625, "time": 1.030782699584961, "tflops": 6.984536205549308, "tokens_per_sec": 115.44625268526455, "iter": 5540, "memory": 16131, "step": 5540} +{"base_lr": 1.4837084838111368e-06, "lr": 1.4837084838111368e-06, "data_time": 0.008736848831176758, "loss": 0.01190185546875, "time": 1.2763559818267822, "tflops": 5.0710808115798285, "tokens_per_sec": 83.832411586909, "iter": 5550, "memory": 16131, "step": 5550} +{"base_lr": 1.4817653401313303e-06, "lr": 1.4817653401313303e-06, "data_time": 0.008872270584106445, "loss": 0.0174560546875, "time": 1.0205974578857422, "tflops": 7.113613399266052, "tokens_per_sec": 117.57818821974436, "iter": 5560, "memory": 16131, "step": 5560} +{"base_lr": 1.4798198246958083e-06, "lr": 1.4798198246958083e-06, "data_time": 0.008346319198608398, "loss": 0.021240234375, "time": 1.0240068435668945, "tflops": 5.847561447742397, "tokens_per_sec": 96.67904137735971, "iter": 5570, "memory": 16131, "step": 5570} +{"base_lr": 1.477871947082444e-06, "lr": 1.477871947082444e-06, "data_time": 0.008388519287109375, "loss": 0.09228515625, "time": 1.0045831203460693, "tflops": 5.900338033281082, "tokens_per_sec": 97.55290330395196, "iter": 5580, "memory": 16130, "step": 5580} +{"base_lr": 1.47592171688074e-06, "lr": 1.47592171688074e-06, "data_time": 0.00835418701171875, "loss": 1.7890625, "time": 1.0130183696746826, "tflops": 7.346298554248856, "tokens_per_sec": 121.41931842694855, "iter": 5590, "memory": 16131, "step": 5590} +{"base_lr": 1.4739691436917794e-06, "lr": 1.4739691436917794e-06, "data_time": 0.00833749771118164, "loss": 0.02099609375, "time": 1.2255518436431885, "tflops": 5.281297859212511, "tokens_per_sec": 87.30760804196957, "iter": 5600, "memory": 16131, "step": 5600} +{"base_lr": 1.4720142371281825e-06, "lr": 1.4720142371281825e-06, "data_time": 0.008711576461791992, "loss": 0.0546875, "time": 1.0496411323547363, "tflops": 6.801318855096585, "tokens_per_sec": 112.4193749297625, "iter": 5610, "memory": 16131, "step": 5610} +{"base_lr": 1.4700570068140546e-06, "lr": 1.4700570068140546e-06, "data_time": 0.008648395538330078, "loss": 0.00067901611328125, "time": 1.0051770210266113, "tflops": 8.4890884569531, "tokens_per_sec": 140.27379958990014, "iter": 5620, "memory": 16131, "step": 5620} +{"base_lr": 1.4680974623849429e-06, "lr": 1.4680974623849429e-06, "data_time": 0.008279085159301758, "loss": 2.40625, "time": 1.0310735702514648, "tflops": 7.041336293686889, "tokens_per_sec": 116.38354765568982, "iter": 5630, "memory": 16131, "step": 5630} +{"base_lr": 1.466135613487785e-06, "lr": 1.466135613487785e-06, "data_time": 0.008427143096923828, "loss": 0.00885009765625, "time": 1.2960395812988281, "tflops": 5.555030254624951, "tokens_per_sec": 91.81818342357425, "iter": 5640, "memory": 16130, "step": 5640} +{"base_lr": 1.4641714697808653e-06, "lr": 1.4641714697808653e-06, "data_time": 0.008579015731811523, "loss": 1.3515625, "time": 1.0333483219146729, "tflops": 3.802883675927318, "tokens_per_sec": 62.902313403383424, "iter": 5650, "memory": 16131, "step": 5650} +{"base_lr": 1.462205040933765e-06, "lr": 1.462205040933765e-06, "data_time": 0.008147954940795898, "loss": 0.0269775390625, "time": 1.0213727951049805, "tflops": 7.226875374672664, "tokens_per_sec": 119.44708199060749, "iter": 5660, "memory": 16130, "step": 5660} +{"base_lr": 1.4602363366273146e-06, "lr": 1.4602363366273146e-06, "data_time": 0.008388042449951172, "loss": 0.000972747802734375, "time": 1.3172636032104492, "tflops": 5.00557421864299, "tokens_per_sec": 82.74729502451997, "iter": 5670, "memory": 16130, "step": 5670} +{"base_lr": 1.4582653665535475e-06, "lr": 1.4582653665535475e-06, "data_time": 0.008216619491577148, "loss": 0.0155029296875, "time": 1.0094718933105469, "tflops": 7.252043441907548, "tokens_per_sec": 119.86465477811628, "iter": 5680, "memory": 16130, "step": 5680} +{"base_lr": 1.4562921404156516e-06, "lr": 1.4562921404156516e-06, "data_time": 0.00832819938659668, "loss": 0.007354736328125, "time": 1.2972602844238281, "tflops": 5.5965143147035885, "tokens_per_sec": 92.50263917021472, "iter": 5690, "memory": 16131, "step": 5690} +{"base_lr": 1.4543166679279218e-06, "lr": 1.4543166679279218e-06, "data_time": 0.008437156677246094, "loss": 0.010498046875, "time": 1.3422937393188477, "tflops": 4.867102881882151, "tokens_per_sec": 80.45928907835372, "iter": 5700, "memory": 16131, "step": 5700} +{"base_lr": 1.4523389588157109e-06, "lr": 1.4523389588157109e-06, "data_time": 0.008423089981079102, "loss": 0.06396484375, "time": 1.2344603538513184, "tflops": 7.1088344100274306, "tokens_per_sec": 117.46023235781188, "iter": 5710, "memory": 16130, "step": 5710} +{"base_lr": 1.4503590228153828e-06, "lr": 1.4503590228153828e-06, "data_time": 0.008768796920776367, "loss": 0.0029449462890625, "time": 1.0437438488006592, "tflops": 7.420371285797883, "tokens_per_sec": 122.63545327424833, "iter": 5720, "memory": 16131, "step": 5720} +{"base_lr": 1.4483768696742662e-06, "lr": 1.4483768696742662e-06, "data_time": 0.00807332992553711, "loss": 1.4453125, "time": 1.0345616340637207, "tflops": 5.84644480835408, "tokens_per_sec": 96.65929675654696, "iter": 5730, "memory": 16130, "step": 5730} +{"base_lr": 1.4463925091506027e-06, "lr": 1.4463925091506027e-06, "data_time": 0.008626699447631836, "loss": 1.7890625, "time": 1.0517644882202148, "tflops": 4.023970556029876, "tokens_per_sec": 66.55482361682198, "iter": 5740, "memory": 16131, "step": 5740} +{"base_lr": 1.4444059510135027e-06, "lr": 1.4444059510135027e-06, "data_time": 0.0088348388671875, "loss": 2.125, "time": 1.045423984527588, "tflops": 5.727765030541304, "tokens_per_sec": 94.69842041613573, "iter": 5750, "memory": 16131, "step": 5750} +{"base_lr": 1.4424172050428942e-06, "lr": 1.4424172050428942e-06, "data_time": 0.00853729248046875, "loss": 2.140625, "time": 1.2557094097137451, "tflops": 3.2258458555249567, "tokens_per_sec": 53.356293646967366, "iter": 5760, "memory": 16131, "step": 5760} +{"base_lr": 1.4404262810294765e-06, "lr": 1.4404262810294765e-06, "data_time": 0.008437395095825195, "loss": 0.048828125, "time": 1.3287339210510254, "tflops": 5.19033926107014, "tokens_per_sec": 85.79595823800484, "iter": 5770, "memory": 16130, "step": 5770} +{"base_lr": 1.4384331887746717e-06, "lr": 1.4384331887746717e-06, "data_time": 0.008423566818237305, "loss": 0.0703125, "time": 1.2526895999908447, "tflops": 4.683377789797404, "tokens_per_sec": 77.43338812793806, "iter": 5780, "memory": 16131, "step": 5780} +{"base_lr": 1.436437938090576e-06, "lr": 1.436437938090576e-06, "data_time": 0.008396387100219727, "loss": 0.039794921875, "time": 1.0186660289764404, "tflops": 6.472838539644698, "tokens_per_sec": 107.00268478513671, "iter": 5790, "memory": 16130, "step": 5790} +{"base_lr": 1.4344405387999126e-06, "lr": 1.4344405387999126e-06, "data_time": 0.008224248886108398, "loss": 1.890625, "time": 0.99997878074646, "tflops": 5.14031685352145, "tokens_per_sec": 85.00180367473854, "iter": 5800, "memory": 16131, "step": 5800} +{"base_lr": 1.4324410007359803e-06, "lr": 1.4324410007359803e-06, "data_time": 0.008633613586425781, "loss": 2.265625, "time": 1.0047602653503418, "tflops": 4.8146027034901, "tokens_per_sec": 79.62098299341666, "iter": 5810, "memory": 16130, "step": 5810} +{"base_lr": 1.4304393337426096e-06, "lr": 1.4304393337426096e-06, "data_time": 0.007999897003173828, "loss": 0.09716796875, "time": 1.0123302936553955, "tflops": 6.932281651527819, "tokens_per_sec": 114.58710731753784, "iter": 5820, "memory": 16131, "step": 5820} +{"base_lr": 1.4284355476741106e-06, "lr": 1.4284355476741106e-06, "data_time": 0.009099721908569336, "loss": 0.01422119140625, "time": 1.0132906436920166, "tflops": 6.447391740602356, "tokens_per_sec": 106.58343750849765, "iter": 5830, "memory": 16131, "step": 5830} +{"base_lr": 1.4264296523952254e-06, "lr": 1.4264296523952254e-06, "data_time": 0.008516311645507812, "loss": 0.035888671875, "time": 1.018162727355957, "tflops": 7.249660297890852, "tokens_per_sec": 119.8236752554271, "iter": 5840, "memory": 16130, "step": 5840} +{"base_lr": 1.4244216577810804e-06, "lr": 1.4244216577810804e-06, "data_time": 0.008328437805175781, "loss": 0.005523681640625, "time": 1.0046086311340332, "tflops": 6.201629765351453, "tokens_per_sec": 102.5274886237319, "iter": 5850, "memory": 16130, "step": 5850} +{"base_lr": 1.422411573717138e-06, "lr": 1.422411573717138e-06, "data_time": 0.008368492126464844, "loss": 1.6953125, "time": 1.196803331375122, "tflops": 4.092615226102406, "tokens_per_sec": 67.68029289061525, "iter": 5860, "memory": 16130, "step": 5860} +{"base_lr": 1.4203994100991454e-06, "lr": 1.4203994100991454e-06, "data_time": 0.008900880813598633, "loss": 0.2314453125, "time": 1.014218807220459, "tflops": 5.903995171492128, "tokens_per_sec": 97.61207275500948, "iter": 5870, "memory": 16131, "step": 5870} +{"base_lr": 1.4183851768330879e-06, "lr": 1.4183851768330879e-06, "data_time": 0.00913095474243164, "loss": 0.01226806640625, "time": 1.015669822692871, "tflops": 8.043242243205121, "tokens_per_sec": 132.9172108726615, "iter": 5880, "memory": 16130, "step": 5880} +{"base_lr": 1.416368883835141e-06, "lr": 1.416368883835141e-06, "data_time": 0.008488893508911133, "loss": 2.453125, "time": 1.022817611694336, "tflops": 5.676729503763663, "tokens_per_sec": 93.85837602158465, "iter": 5890, "memory": 16131, "step": 5890} +{"base_lr": 1.414350541031619e-06, "lr": 1.414350541031619e-06, "data_time": 0.009112119674682617, "loss": 1.78125, "time": 1.0127575397491455, "tflops": 4.477750263818115, "tokens_per_sec": 74.05523736560187, "iter": 5900, "memory": 16130, "step": 5900} +{"base_lr": 1.412330158358928e-06, "lr": 1.412330158358928e-06, "data_time": 0.008639335632324219, "loss": 0.06201171875, "time": 1.0379247665405273, "tflops": 5.3023923226518415, "tokens_per_sec": 87.67494806316397, "iter": 5910, "memory": 16130, "step": 5910} +{"base_lr": 1.4103077457635167e-06, "lr": 1.4103077457635167e-06, "data_time": 0.01172947883605957, "loss": 0.0016326904296875, "time": 1.0150878429412842, "tflops": 7.032833733761803, "tokens_per_sec": 116.2460971436432, "iter": 5920, "memory": 16131, "step": 5920} +{"base_lr": 1.4082833132018268e-06, "lr": 1.4082833132018268e-06, "data_time": 0.008364439010620117, "loss": 1.703125, "time": 1.0107147693634033, "tflops": 6.044310263321613, "tokens_per_sec": 99.92928080344043, "iter": 5930, "memory": 16131, "step": 5930} +{"base_lr": 1.4062568706402451e-06, "lr": 1.4062568706402451e-06, "data_time": 0.0085906982421875, "loss": 0.0264892578125, "time": 1.006300449371338, "tflops": 6.732986537627047, "tokens_per_sec": 111.29876774859636, "iter": 5940, "memory": 16131, "step": 5940} +{"base_lr": 1.4042284280550534e-06, "lr": 1.4042284280550534e-06, "data_time": 0.008822441101074219, "loss": 0.0751953125, "time": 1.0154716968536377, "tflops": 6.851169262234889, "tokens_per_sec": 113.247863388222, "iter": 5950, "memory": 16131, "step": 5950} +{"base_lr": 1.402197995432379e-06, "lr": 1.402197995432379e-06, "data_time": 0.008704900741577148, "loss": 0.01361083984375, "time": 1.0038094520568848, "tflops": 5.361959585175956, "tokens_per_sec": 88.66224542670255, "iter": 5960, "memory": 16131, "step": 5960} +{"base_lr": 1.4001655827681465e-06, "lr": 1.4001655827681465e-06, "data_time": 0.00882863998413086, "loss": 0.01177978515625, "time": 1.0384149551391602, "tflops": 6.2913979567004565, "tokens_per_sec": 104.0046654426531, "iter": 5970, "memory": 16130, "step": 5970} +{"base_lr": 1.3981312000680293e-06, "lr": 1.3981312000680293e-06, "data_time": 0.008381366729736328, "loss": 1.96875, "time": 1.0334961414337158, "tflops": 5.207939824216552, "tokens_per_sec": 86.11546423042158, "iter": 5980, "memory": 16130, "step": 5980} +{"base_lr": 1.396094857347399e-06, "lr": 1.396094857347399e-06, "data_time": 0.008638858795166016, "loss": 0.048828125, "time": 1.1932485103607178, "tflops": 5.728892804822155, "tokens_per_sec": 94.69946873492891, "iter": 5990, "memory": 16131, "step": 5990} +{"base_lr": 1.394056564631276e-06, "lr": 1.394056564631276e-06, "data_time": 0.008797645568847656, "loss": 1.65625, "time": 1.102278470993042, "tflops": 3.5650732263836806, "tokens_per_sec": 58.96876489058393, "iter": 6000, "memory": 16131, "step": 6000} +{"base_lr": 1.3920163319542807e-06, "lr": 1.3920163319542807e-06, "data_time": 0.008618831634521484, "loss": 0.0296630859375, "time": 0.9960112571716309, "tflops": 6.6200664740571025, "tokens_per_sec": 109.43651411071139, "iter": 6010, "memory": 16132, "step": 6010} +{"base_lr": 1.3899741693605843e-06, "lr": 1.3899741693605843e-06, "data_time": 0.008430957794189453, "loss": 0.019775390625, "time": 0.9893791675567627, "tflops": 6.848140329411608, "tokens_per_sec": 113.20230268893461, "iter": 6020, "memory": 16131, "step": 6020} +{"base_lr": 1.3879300869038589e-06, "lr": 1.3879300869038589e-06, "data_time": 0.008415460586547852, "loss": 1.609375, "time": 1.0048258304595947, "tflops": 4.392630032761775, "tokens_per_sec": 72.64940628221913, "iter": 6030, "memory": 16131, "step": 6030} +{"base_lr": 1.3858840946472285e-06, "lr": 1.3858840946472285e-06, "data_time": 0.008335590362548828, "loss": 1.9921875, "time": 1.016373872756958, "tflops": 7.679829452179199, "tokens_per_sec": 126.92179861919807, "iter": 6040, "memory": 16131, "step": 6040} +{"base_lr": 1.3838362026632188e-06, "lr": 1.3838362026632188e-06, "data_time": 0.008728981018066406, "loss": 2.140625, "time": 0.997981071472168, "tflops": 4.36211371472795, "tokens_per_sec": 72.14565692485262, "iter": 6050, "memory": 16131, "step": 6050} +{"base_lr": 1.3817864210337083e-06, "lr": 1.3817864210337083e-06, "data_time": 0.008628368377685547, "loss": 0.00518798828125, "time": 1.295414924621582, "tflops": 5.370610098680903, "tokens_per_sec": 88.77464495285565, "iter": 6060, "memory": 16131, "step": 6060} +{"base_lr": 1.379734759849878e-06, "lr": 1.379734759849878e-06, "data_time": 0.008481740951538086, "loss": 0.0849609375, "time": 1.0109515190124512, "tflops": 7.181487554185474, "tokens_per_sec": 118.70005410061937, "iter": 6070, "memory": 16131, "step": 6070} +{"base_lr": 1.3776812292121618e-06, "lr": 1.3776812292121618e-06, "data_time": 0.009057283401489258, "loss": 0.0478515625, "time": 1.2563891410827637, "tflops": 5.392761809917841, "tokens_per_sec": 89.14435530968422, "iter": 6080, "memory": 16130, "step": 6080} +{"base_lr": 1.3756258392301975e-06, "lr": 1.3756258392301975e-06, "data_time": 0.008758306503295898, "loss": 2.4375, "time": 1.3520748615264893, "tflops": 4.607888931784072, "tokens_per_sec": 76.17921383704824, "iter": 6090, "memory": 16130, "step": 6090} +{"base_lr": 1.3735686000227766e-06, "lr": 1.3735686000227766e-06, "data_time": 0.008780956268310547, "loss": 0.0111083984375, "time": 1.0023572444915771, "tflops": 6.457282933559853, "tokens_per_sec": 106.74836799744641, "iter": 6100, "memory": 16133, "step": 6100} +{"base_lr": 1.3715095217177938e-06, "lr": 1.3715095217177938e-06, "data_time": 0.008749008178710938, "loss": 0.0174560546875, "time": 1.0273091793060303, "tflops": 8.07013595188973, "tokens_per_sec": 133.3580997420982, "iter": 6110, "memory": 16130, "step": 6110} +{"base_lr": 1.3694486144521995e-06, "lr": 1.3694486144521995e-06, "data_time": 0.008884191513061523, "loss": 2.546875, "time": 1.0408477783203125, "tflops": 5.3456769934655775, "tokens_per_sec": 88.38948587504152, "iter": 6120, "memory": 16131, "step": 6120} +{"base_lr": 1.3673858883719458e-06, "lr": 1.3673858883719458e-06, "data_time": 0.00848531723022461, "loss": 0.007720947265625, "time": 1.0491399765014648, "tflops": 7.09336747359995, "tokens_per_sec": 117.23888399529595, "iter": 6130, "memory": 16131, "step": 6130} +{"base_lr": 1.3653213536319405e-06, "lr": 1.3653213536319405e-06, "data_time": 0.008714437484741211, "loss": 2.34375, "time": 0.9870307445526123, "tflops": 4.83975709489415, "tokens_per_sec": 80.03803370454078, "iter": 6140, "memory": 16131, "step": 6140} +{"base_lr": 1.3632550203959943e-06, "lr": 1.3632550203959943e-06, "data_time": 0.00822591781616211, "loss": 0.064453125, "time": 0.9959478378295898, "tflops": 4.431786437922655, "tokens_per_sec": 73.29701137663122, "iter": 6150, "memory": 16131, "step": 6150} +{"base_lr": 1.3611868988367727e-06, "lr": 1.3611868988367727e-06, "data_time": 0.008795738220214844, "loss": 1.953125, "time": 1.006230354309082, "tflops": 3.9655069141060393, "tokens_per_sec": 65.59134269533405, "iter": 6160, "memory": 16130, "step": 6160} +{"base_lr": 1.359116999135746e-06, "lr": 1.359116999135746e-06, "data_time": 0.008676528930664062, "loss": 1.7265625, "time": 1.0106127262115479, "tflops": 4.4273645203741205, "tokens_per_sec": 73.22290535300128, "iter": 6170, "memory": 16130, "step": 6170} +{"base_lr": 1.3570453314831373e-06, "lr": 1.3570453314831373e-06, "data_time": 0.008481740951538086, "loss": 0.0130615234375, "time": 1.2077956199645996, "tflops": 5.158331994574917, "tokens_per_sec": 85.27932896704299, "iter": 6180, "memory": 16131, "step": 6180} +{"base_lr": 1.3549719060778731e-06, "lr": 1.3549719060778731e-06, "data_time": 0.008807182312011719, "loss": 0.00994873046875, "time": 1.009511947631836, "tflops": 7.071678588169088, "tokens_per_sec": 116.88816588718286, "iter": 6190, "memory": 16130, "step": 6190} +{"base_lr": 1.3528967331275351e-06, "lr": 1.3528967331275351e-06, "data_time": 0.008340597152709961, "loss": 0.00286865234375, "time": 1.244638204574585, "tflops": 6.173968745055777, "tokens_per_sec": 102.0376841503964, "iter": 6200, "memory": 16131, "step": 6200} +{"base_lr": 1.3508198228483064e-06, "lr": 1.3508198228483064e-06, "data_time": 0.008597612380981445, "loss": 0.0216064453125, "time": 1.0065340995788574, "tflops": 7.152801964997704, "tokens_per_sec": 118.22748980851458, "iter": 6210, "memory": 16131, "step": 6210} +{"base_lr": 1.3487411854649244e-06, "lr": 1.3487411854649244e-06, "data_time": 0.008760213851928711, "loss": 1.6953125, "time": 1.0075984001159668, "tflops": 4.380543002116341, "tokens_per_sec": 72.44949971290725, "iter": 6220, "memory": 16130, "step": 6220} +{"base_lr": 1.3466608312106281e-06, "lr": 1.3466608312106281e-06, "data_time": 0.008752822875976562, "loss": 0.2197265625, "time": 0.9965662956237793, "tflops": 6.373238163285597, "tokens_per_sec": 105.3617812091188, "iter": 6230, "memory": 16130, "step": 6230} +{"base_lr": 1.3445787703271096e-06, "lr": 1.3445787703271096e-06, "data_time": 0.008851289749145508, "loss": 0.0361328125, "time": 0.9942753314971924, "tflops": 6.814417660571636, "tokens_per_sec": 112.64485445016155, "iter": 6240, "memory": 16130, "step": 6240} +{"base_lr": 1.3424950130644623e-06, "lr": 1.3424950130644623e-06, "data_time": 0.008968830108642578, "loss": 1.453125, "time": 1.0406074523925781, "tflops": 3.7763552970152174, "tokens_per_sec": 62.463515757540165, "iter": 6250, "memory": 16130, "step": 6250} +{"base_lr": 1.340409569681131e-06, "lr": 1.340409569681131e-06, "data_time": 0.008655309677124023, "loss": 1.3515625, "time": 1.0366618633270264, "tflops": 4.608049372527596, "tokens_per_sec": 76.2061408783612, "iter": 6260, "memory": 16131, "step": 6260} +{"base_lr": 1.3383224504438617e-06, "lr": 1.3383224504438617e-06, "data_time": 0.008957386016845703, "loss": 0.000820159912109375, "time": 1.009840726852417, "tflops": 7.549457320774725, "tokens_per_sec": 124.77215133975228, "iter": 6270, "memory": 16130, "step": 6270} +{"base_lr": 1.3362336656276498e-06, "lr": 1.3362336656276498e-06, "data_time": 0.008631706237792969, "loss": 0.0047607421875, "time": 1.0822553634643555, "tflops": 6.260451652313654, "tokens_per_sec": 103.48759061944374, "iter": 6280, "memory": 16130, "step": 6280} +{"base_lr": 1.334143225515692e-06, "lr": 1.334143225515692e-06, "data_time": 0.008478403091430664, "loss": 1.7421875, "time": 1.2590675354003906, "tflops": 3.361431546347315, "tokens_per_sec": 55.59669996390146, "iter": 6290, "memory": 16131, "step": 6290} +{"base_lr": 1.332051140399333e-06, "lr": 1.332051140399333e-06, "data_time": 0.00862431526184082, "loss": 0.061767578125, "time": 1.0123648643493652, "tflops": 6.572967490046218, "tokens_per_sec": 108.65647739620736, "iter": 6300, "memory": 16131, "step": 6300} +{"base_lr": 1.329957420578016e-06, "lr": 1.329957420578016e-06, "data_time": 0.008659124374389648, "loss": 2.09375, "time": 1.000124216079712, "tflops": 5.3817172173317624, "tokens_per_sec": 88.98894614188356, "iter": 6310, "memory": 16131, "step": 6310} +{"base_lr": 1.3278620763592319e-06, "lr": 1.3278620763592319e-06, "data_time": 0.008481025695800781, "loss": 1.859375, "time": 1.0015993118286133, "tflops": 5.73652628772715, "tokens_per_sec": 94.84830797903032, "iter": 6320, "memory": 16131, "step": 6320} +{"base_lr": 1.3257651180584692e-06, "lr": 1.3257651180584692e-06, "data_time": 0.008529186248779297, "loss": 1.734375, "time": 1.027479648590088, "tflops": 8.77695002391332, "tokens_per_sec": 145.01503772294996, "iter": 6330, "memory": 16131, "step": 6330} +{"base_lr": 1.3236665559991618e-06, "lr": 1.3236665559991618e-06, "data_time": 0.009041786193847656, "loss": 0.921875, "time": 1.013559341430664, "tflops": 3.5189625944880816, "tokens_per_sec": 58.21070122708536, "iter": 6340, "memory": 16130, "step": 6340} +{"base_lr": 1.321566400512639e-06, "lr": 1.321566400512639e-06, "data_time": 0.00879526138305664, "loss": 1.390625, "time": 1.0352580547332764, "tflops": 3.971220653415971, "tokens_per_sec": 65.68410618882247, "iter": 6350, "memory": 16130, "step": 6350} +{"base_lr": 1.3194646619380757e-06, "lr": 1.3194646619380757e-06, "data_time": 0.00854802131652832, "loss": 1.828125, "time": 1.0195212364196777, "tflops": 9.499869819919686, "tokens_per_sec": 156.93640729027476, "iter": 6360, "memory": 16131, "step": 6360} +{"base_lr": 1.3173613506224397e-06, "lr": 1.3173613506224397e-06, "data_time": 0.009057044982910156, "loss": 2.03125, "time": 1.0132884979248047, "tflops": 7.882672115337539, "tokens_per_sec": 130.26892170413774, "iter": 6370, "memory": 16131, "step": 6370} +{"base_lr": 1.3152564769204413e-06, "lr": 1.3152564769204413e-06, "data_time": 0.009070158004760742, "loss": 2.734375, "time": 0.9920988082885742, "tflops": 5.730421414691003, "tokens_per_sec": 94.74862706675407, "iter": 6380, "memory": 16133, "step": 6380} +{"base_lr": 1.3131500511944836e-06, "lr": 1.3131500511944836e-06, "data_time": 0.008321285247802734, "loss": 1.9296875, "time": 1.0182573795318604, "tflops": 2.968057750375938, "tokens_per_sec": 49.10349878626776, "iter": 6390, "memory": 16131, "step": 6390} +{"base_lr": 1.3110420838146082e-06, "lr": 1.3110420838146082e-06, "data_time": 0.008952140808105469, "loss": 0.000514984130859375, "time": 1.274562120437622, "tflops": 4.935641554659665, "tokens_per_sec": 81.59665059260502, "iter": 6400, "memory": 16131, "step": 6400} +{"base_lr": 1.308932585158449e-06, "lr": 1.308932585158449e-06, "data_time": 0.008725404739379883, "loss": 1.2890625, "time": 1.0198712348937988, "tflops": 4.802621516377252, "tokens_per_sec": 79.4217909365345, "iter": 6410, "memory": 16131, "step": 6410} +{"base_lr": 1.3068215656111759e-06, "lr": 1.3068215656111759e-06, "data_time": 0.00862431526184082, "loss": 0.003692626953125, "time": 1.0135180950164795, "tflops": 6.744815745256234, "tokens_per_sec": 111.49282933922473, "iter": 6420, "memory": 16131, "step": 6420} +{"base_lr": 1.3047090355654477e-06, "lr": 1.3047090355654477e-06, "data_time": 0.008276700973510742, "loss": 2.25, "time": 1.252765417098999, "tflops": 4.8281245729161055, "tokens_per_sec": 79.82340399488992, "iter": 6430, "memory": 16132, "step": 6430} +{"base_lr": 1.3025950054213595e-06, "lr": 1.3025950054213595e-06, "data_time": 0.008724689483642578, "loss": 1.5546875, "time": 0.9982080459594727, "tflops": 3.8155256038948195, "tokens_per_sec": 63.11309576690659, "iter": 6440, "memory": 16130, "step": 6440} +{"base_lr": 1.300479485586391e-06, "lr": 1.300479485586391e-06, "data_time": 0.009004592895507812, "loss": 0.00469970703125, "time": 1.0219662189483643, "tflops": 7.22267895400731, "tokens_per_sec": 119.37772280322777, "iter": 6450, "memory": 16131, "step": 6450} +{"base_lr": 1.2983624864753557e-06, "lr": 1.2983624864753557e-06, "data_time": 0.008596181869506836, "loss": 1.7265625, "time": 1.0112452507019043, "tflops": 4.424595245082172, "tokens_per_sec": 73.177105107355, "iter": 6460, "memory": 16131, "step": 6460} +{"base_lr": 1.29624401851035e-06, "lr": 1.29624401851035e-06, "data_time": 0.008503437042236328, "loss": 0.0059814453125, "time": 1.0081255435943604, "tflops": 6.8409930506831165, "tokens_per_sec": 113.08115415212325, "iter": 6470, "memory": 16130, "step": 6470} +{"base_lr": 1.2941240921207017e-06, "lr": 1.2941240921207017e-06, "data_time": 0.008389949798583984, "loss": 1.1484375, "time": 1.0154609680175781, "tflops": 4.704256686648544, "tokens_per_sec": 77.79718028369818, "iter": 6480, "memory": 16131, "step": 6480} +{"base_lr": 1.2920027177429183e-06, "lr": 1.2920027177429183e-06, "data_time": 0.008329153060913086, "loss": 1.640625, "time": 1.0111398696899414, "tflops": 4.724360290819216, "tokens_per_sec": 78.12964592539174, "iter": 6490, "memory": 16131, "step": 6490} +{"base_lr": 1.2898799058206351e-06, "lr": 1.2898799058206351e-06, "data_time": 0.00912165641784668, "loss": 1.8125, "time": 1.2249231338500977, "tflops": 5.284008562923264, "tokens_per_sec": 87.35241995438301, "iter": 6500, "memory": 16130, "step": 6500} +{"base_lr": 1.287755666804565e-06, "lr": 1.287755666804565e-06, "data_time": 0.008774518966674805, "loss": 2.0625, "time": 1.0250837802886963, "tflops": 6.4323139807446585, "tokens_per_sec": 106.33277210687675, "iter": 6510, "memory": 16131, "step": 6510} +{"base_lr": 1.2856300111524479e-06, "lr": 1.2856300111524479e-06, "data_time": 0.008492469787597656, "loss": 2.0, "time": 1.0141034126281738, "tflops": 4.352443809584141, "tokens_per_sec": 71.9847690983896, "iter": 6520, "memory": 16131, "step": 6520} +{"base_lr": 1.2835029493289953e-06, "lr": 1.2835029493289953e-06, "data_time": 0.008346796035766602, "loss": 0.1328125, "time": 1.2549731731414795, "tflops": 4.867891827888839, "tokens_per_sec": 80.4798079843363, "iter": 6530, "memory": 16131, "step": 6530} +{"base_lr": 1.281374491805843e-06, "lr": 1.281374491805843e-06, "data_time": 0.008779764175415039, "loss": 2.25, "time": 1.0063529014587402, "tflops": 6.190880734123112, "tokens_per_sec": 102.34978192102979, "iter": 6540, "memory": 16131, "step": 6540} +{"base_lr": 1.2792446490614984e-06, "lr": 1.2792446490614984e-06, "data_time": 0.008798360824584961, "loss": 0.0076904296875, "time": 1.0262205600738525, "tflops": 8.019621804224727, "tokens_per_sec": 132.52511720295308, "iter": 6550, "memory": 16131, "step": 6550} +{"base_lr": 1.2771134315812867e-06, "lr": 1.2771134315812867e-06, "data_time": 0.008794307708740234, "loss": 0.0224609375, "time": 1.3350074291229248, "tflops": 4.666798591124295, "tokens_per_sec": 77.15312870400425, "iter": 6560, "memory": 16131, "step": 6560} +{"base_lr": 1.2749808498573025e-06, "lr": 1.2749808498573025e-06, "data_time": 0.008525371551513672, "loss": 0.0242919921875, "time": 1.0311541557312012, "tflops": 6.0419780638444465, "tokens_per_sec": 99.88807146576627, "iter": 6570, "memory": 16131, "step": 6570} +{"base_lr": 1.2728469143883555e-06, "lr": 1.2728469143883555e-06, "data_time": 0.008816719055175781, "loss": 1.7109375, "time": 1.2480733394622803, "tflops": 3.5365134251438435, "tokens_per_sec": 58.49015253494063, "iter": 6580, "memory": 16131, "step": 6580} +{"base_lr": 1.2707116356799205e-06, "lr": 1.2707116356799205e-06, "data_time": 0.00945591926574707, "loss": 2.296875, "time": 1.0191524028778076, "tflops": 6.64808065927476, "tokens_per_sec": 109.89524204979817, "iter": 6590, "memory": 16131, "step": 6590} +{"base_lr": 1.2685750242440857e-06, "lr": 1.2685750242440857e-06, "data_time": 0.00853872299194336, "loss": 0.11181640625, "time": 0.9946908950805664, "tflops": 6.202571521724925, "tokens_per_sec": 102.54441907969392, "iter": 6600, "memory": 16131, "step": 6600} +{"base_lr": 1.2664370905994998e-06, "lr": 1.2664370905994998e-06, "data_time": 0.008564949035644531, "loss": 1.9609375, "time": 1.015854835510254, "tflops": 4.047072499789715, "tokens_per_sec": 66.93869795459243, "iter": 6610, "memory": 16130, "step": 6610} +{"base_lr": 1.2642978452713195e-06, "lr": 1.2642978452713195e-06, "data_time": 0.008969306945800781, "loss": 0.001434326171875, "time": 1.0077581405639648, "tflops": 6.783366494607164, "tokens_per_sec": 112.13007908489872, "iter": 6620, "memory": 16130, "step": 6620} +{"base_lr": 1.262157298791162e-06, "lr": 1.262157298791162e-06, "data_time": 0.008797168731689453, "loss": 1.7578125, "time": 1.210519790649414, "tflops": 4.696448831668719, "tokens_per_sec": 77.65259248631834, "iter": 6630, "memory": 16131, "step": 6630} +{"base_lr": 1.260015461697048e-06, "lr": 1.260015461697048e-06, "data_time": 0.008932113647460938, "loss": 0.02392578125, "time": 1.0112578868865967, "tflops": 5.861393092140936, "tokens_per_sec": 96.90900933452288, "iter": 6640, "memory": 16130, "step": 6640} +{"base_lr": 1.2578723445333524e-06, "lr": 1.2578723445333524e-06, "data_time": 0.008601188659667969, "loss": 1.5078125, "time": 1.0287413597106934, "tflops": 4.114026613711828, "tokens_per_sec": 68.04431389792437, "iter": 6650, "memory": 16131, "step": 6650} +{"base_lr": 1.255727957850752e-06, "lr": 1.255727957850752e-06, "data_time": 0.008272409439086914, "loss": 0.00189971923828125, "time": 1.0014965534210205, "tflops": 7.3703038478671425, "tokens_per_sec": 121.81769331420797, "iter": 6660, "memory": 16131, "step": 6660} +{"base_lr": 1.2535823122061737e-06, "lr": 1.2535823122061737e-06, "data_time": 0.009047508239746094, "loss": 0.00107574462890625, "time": 1.0018844604492188, "tflops": 7.911895399473424, "tokens_per_sec": 130.75360001205354, "iter": 6670, "memory": 16131, "step": 6670} +{"base_lr": 1.2514354181627408e-06, "lr": 1.2514354181627408e-06, "data_time": 0.009181499481201172, "loss": 1.3046875, "time": 1.3078892230987549, "tflops": 3.3284981954316213, "tokens_per_sec": 55.05053388952685, "iter": 6680, "memory": 16131, "step": 6680} +{"base_lr": 1.2492872862897252e-06, "lr": 1.2492872862897252e-06, "data_time": 0.008400201797485352, "loss": 0.00347900390625, "time": 1.020009994506836, "tflops": 6.820686575249584, "tokens_per_sec": 112.74399331301507, "iter": 6690, "memory": 16131, "step": 6690} +{"base_lr": 1.2471379271624905e-06, "lr": 1.2471379271624905e-06, "data_time": 0.009161949157714844, "loss": 1.40625, "time": 1.010868787765503, "tflops": 4.965163142553522, "tokens_per_sec": 82.10759002994548, "iter": 6700, "memory": 16131, "step": 6700} +{"base_lr": 1.2449873513624445e-06, "lr": 1.2449873513624445e-06, "data_time": 0.008266925811767578, "loss": 1.40625, "time": 1.0034635066986084, "tflops": 4.036743706200971, "tokens_per_sec": 66.76874600090142, "iter": 6710, "memory": 16130, "step": 6710} +{"base_lr": 1.2428355694769818e-06, "lr": 1.2428355694769818e-06, "data_time": 0.008746623992919922, "loss": 0.005340576171875, "time": 1.2854185104370117, "tflops": 6.355345248806386, "tokens_per_sec": 105.02416053896577, "iter": 6720, "memory": 16133, "step": 6720} +{"base_lr": 1.2406825920994362e-06, "lr": 1.2406825920994362e-06, "data_time": 0.008968591690063477, "loss": 1.9453125, "time": 1.2474446296691895, "tflops": 5.820006418721365, "tokens_per_sec": 96.19665446130996, "iter": 6730, "memory": 16131, "step": 6730} +{"base_lr": 1.238528429829027e-06, "lr": 1.238528429829027e-06, "data_time": 0.008239030838012695, "loss": 2.171875, "time": 1.0114555358886719, "tflops": 5.560886402810219, "tokens_per_sec": 91.94670126373632, "iter": 6740, "memory": 16131, "step": 6740} +{"base_lr": 1.2363730932708064e-06, "lr": 1.2363730932708064e-06, "data_time": 0.009282588958740234, "loss": 1.765625, "time": 1.0316145420074463, "tflops": 4.2198968138201725, "tokens_per_sec": 69.79351014170804, "iter": 6750, "memory": 16130, "step": 6750} +{"base_lr": 1.2342165930356085e-06, "lr": 1.2342165930356085e-06, "data_time": 0.008162736892700195, "loss": 1.5, "time": 1.0198240280151367, "tflops": 4.446723372091645, "tokens_per_sec": 73.54209936188448, "iter": 6760, "memory": 16130, "step": 6760} +{"base_lr": 1.2320589397399943e-06, "lr": 1.2320589397399943e-06, "data_time": 0.008396625518798828, "loss": 1.78125, "time": 0.9877581596374512, "tflops": 4.28472221786468, "tokens_per_sec": 70.86754922441955, "iter": 6770, "memory": 16131, "step": 6770} +{"base_lr": 1.229900144006204e-06, "lr": 1.229900144006204e-06, "data_time": 0.00878143310546875, "loss": 1.9453125, "time": 1.009807825088501, "tflops": 4.370958523900124, "tokens_per_sec": 72.29098268626497, "iter": 6780, "memory": 16132, "step": 6780} +{"base_lr": 1.2277402164621008e-06, "lr": 1.2277402164621008e-06, "data_time": 0.008890390396118164, "loss": 1.9375, "time": 1.0321059226989746, "tflops": 6.74075046285961, "tokens_per_sec": 111.42267229623255, "iter": 6790, "memory": 16131, "step": 6790} +{"base_lr": 1.2255791677411194e-06, "lr": 1.2255791677411194e-06, "data_time": 0.008545875549316406, "loss": 1.6953125, "time": 1.0121588706970215, "tflops": 5.138274242593333, "tokens_per_sec": 84.96689846791668, "iter": 6800, "memory": 16130, "step": 6800} +{"base_lr": 1.2234170084822145e-06, "lr": 1.2234170084822145e-06, "data_time": 0.008949518203735352, "loss": 1.9765625, "time": 1.0085861682891846, "tflops": 4.256242508418878, "tokens_per_sec": 70.39557177386581, "iter": 6810, "memory": 16131, "step": 6810} +{"base_lr": 1.221253749329808e-06, "lr": 1.221253749329808e-06, "data_time": 0.008491754531860352, "loss": 2.328125, "time": 1.015773057937622, "tflops": 6.670197959553877, "tokens_per_sec": 110.26084923662897, "iter": 6820, "memory": 16130, "step": 6820} +{"base_lr": 1.2190894009337371e-06, "lr": 1.2190894009337371e-06, "data_time": 0.0074138641357421875, "loss": 1.6171875, "time": 0.9840269088745117, "tflops": 5.592818919849518, "tokens_per_sec": 92.47714587804258, "iter": 6830, "memory": 16131, "step": 6830} +{"base_lr": 1.2169239739492003e-06, "lr": 1.2169239739492003e-06, "data_time": 0.008954763412475586, "loss": 0.11181640625, "time": 1.0163459777832031, "tflops": 6.189606593757483, "tokens_per_sec": 102.32735925883884, "iter": 6840, "memory": 16131, "step": 6840} +{"base_lr": 1.2147574790367079e-06, "lr": 1.2147574790367079e-06, "data_time": 0.008650541305541992, "loss": 2.4375, "time": 1.0103769302368164, "tflops": 5.686690392592012, "tokens_per_sec": 94.0243162298247, "iter": 6850, "memory": 16131, "step": 6850} +{"base_lr": 1.2125899268620254e-06, "lr": 1.2125899268620254e-06, "data_time": 0.0076351165771484375, "loss": 0.001922607421875, "time": 1.0024707317352295, "tflops": 8.572488317726934, "tokens_per_sec": 141.65002079817637, "iter": 6860, "memory": 16131, "step": 6860} +{"base_lr": 1.2104213280961249e-06, "lr": 1.2104213280961249e-06, "data_time": 0.00854182243347168, "loss": 1.578125, "time": 1.2346653938293457, "tflops": 5.389509882346596, "tokens_per_sec": 89.09296441746305, "iter": 6870, "memory": 16130, "step": 6870} +{"base_lr": 1.2082516934151308e-06, "lr": 1.2082516934151308e-06, "data_time": 0.008342742919921875, "loss": 0.00128936767578125, "time": 1.037912368774414, "tflops": 7.637258493019481, "tokens_per_sec": 126.21489437934049, "iter": 6880, "memory": 16131, "step": 6880} +{"base_lr": 1.2060810335002664e-06, "lr": 1.2060810335002664e-06, "data_time": 0.008574724197387695, "loss": 1.765625, "time": 1.0378928184509277, "tflops": 4.777556122209229, "tokens_per_sec": 79.00623122367043, "iter": 6890, "memory": 16131, "step": 6890} +{"base_lr": 1.2039093590378037e-06, "lr": 1.2039093590378037e-06, "data_time": 0.009007453918457031, "loss": 0.0113525390625, "time": 1.015631914138794, "tflops": 7.745119679290895, "tokens_per_sec": 127.9991286115754, "iter": 6900, "memory": 16131, "step": 6900} +{"base_lr": 1.2017366807190075e-06, "lr": 1.2017366807190075e-06, "data_time": 0.008626937866210938, "loss": 0.03271484375, "time": 1.0266172885894775, "tflops": 7.426087163043237, "tokens_per_sec": 122.73317564425119, "iter": 6910, "memory": 16131, "step": 6910} +{"base_lr": 1.1995630092400859e-06, "lr": 1.1995630092400859e-06, "data_time": 0.008728504180908203, "loss": 0.09228515625, "time": 1.0216865539550781, "tflops": 6.512996883118483, "tokens_per_sec": 107.66511468127713, "iter": 6920, "memory": 16130, "step": 6920} +{"base_lr": 1.1973883553021364e-06, "lr": 1.1973883553021364e-06, "data_time": 0.00915384292602539, "loss": 0.0157470703125, "time": 1.0247185230255127, "tflops": 5.252550424535207, "tokens_per_sec": 86.8531191737785, "iter": 6930, "memory": 16132, "step": 6930} +{"base_lr": 1.1952127296110933e-06, "lr": 1.1952127296110933e-06, "data_time": 0.008169412612915039, "loss": 1.8984375, "time": 1.0118365287780762, "tflops": 6.157329382932666, "tokens_per_sec": 101.79509937665925, "iter": 6940, "memory": 16130, "step": 6940} +{"base_lr": 1.193036142877673e-06, "lr": 1.193036142877673e-06, "data_time": 0.008893966674804688, "loss": 0.006744384765625, "time": 1.0236499309539795, "tflops": 7.210799002766341, "tokens_per_sec": 119.18136885544868, "iter": 6950, "memory": 16132, "step": 6950} +{"base_lr": 1.1908586058173264e-06, "lr": 1.1908586058173264e-06, "data_time": 0.00897526741027832, "loss": 1.2890625, "time": 1.2292370796203613, "tflops": 3.3445445445254336, "tokens_per_sec": 55.31886495072688, "iter": 6960, "memory": 16133, "step": 6960} +{"base_lr": 1.1886801291501806e-06, "lr": 1.1886801291501806e-06, "data_time": 0.008398294448852539, "loss": 1.6328125, "time": 1.0134379863739014, "tflops": 5.550008399325279, "tokens_per_sec": 91.76683847490642, "iter": 6970, "memory": 16131, "step": 6970} +{"base_lr": 1.1865007236009887e-06, "lr": 1.1865007236009887e-06, "data_time": 0.008649587631225586, "loss": 2.453125, "time": 1.028923749923706, "tflops": 6.231684841086823, "tokens_per_sec": 103.02026754437031, "iter": 6980, "memory": 16131, "step": 6980} +{"base_lr": 1.1843203998990779e-06, "lr": 1.1843203998990779e-06, "data_time": 0.009035348892211914, "loss": 1.90625, "time": 1.2918763160705566, "tflops": 5.197729437086505, "tokens_per_sec": 85.92153801343606, "iter": 6990, "memory": 16131, "step": 6990} +{"base_lr": 1.1821391687782938e-06, "lr": 1.1821391687782938e-06, "data_time": 0.008224248886108398, "loss": 1.53125, "time": 1.0343372821807861, "tflops": 4.150278054200058, "tokens_per_sec": 68.64298640597744, "iter": 7000, "memory": 16130, "step": 7000} +{"base_lr": 1.1799570409769508e-06, "lr": 1.1799570409769508e-06, "data_time": 0.008670806884765625, "loss": 0.033447265625, "time": 0.9976944923400879, "tflops": 5.516201959465654, "tokens_per_sec": 91.21028601297448, "iter": 7010, "memory": 16131, "step": 7010} +{"base_lr": 1.1777740272377766e-06, "lr": 1.1777740272377766e-06, "data_time": 0.008645057678222656, "loss": 1.8359375, "time": 1.198145866394043, "tflops": 7.830457781362184, "tokens_per_sec": 129.36655239345845, "iter": 7020, "memory": 16131, "step": 7020} +{"base_lr": 1.175590138307861e-06, "lr": 1.175590138307861e-06, "data_time": 0.009027957916259766, "loss": 0.003509521484375, "time": 1.025482416152954, "tflops": 7.197913669739464, "tokens_per_sec": 118.96839777863565, "iter": 7030, "memory": 16131, "step": 7030} +{"base_lr": 1.1734053849386026e-06, "lr": 1.1734053849386026e-06, "data_time": 0.008687019348144531, "loss": 0.00390625, "time": 1.267059326171875, "tflops": 6.734520215540222, "tokens_per_sec": 111.28129290195702, "iter": 7040, "memory": 16131, "step": 7040} +{"base_lr": 1.1712197778856549e-06, "lr": 1.1712197778856549e-06, "data_time": 0.008668661117553711, "loss": 1.875, "time": 1.0657057762145996, "tflops": 5.391451290133202, "tokens_per_sec": 89.14280293886748, "iter": 7050, "memory": 16130, "step": 7050} +{"base_lr": 1.1690333279088744e-06, "lr": 1.1690333279088744e-06, "data_time": 0.008703470230102539, "loss": 1.5, "time": 1.0214269161224365, "tflops": 5.625170720864035, "tokens_per_sec": 93.00714373236617, "iter": 7060, "memory": 16130, "step": 7060} +{"base_lr": 1.1668460457722682e-06, "lr": 1.1668460457722682e-06, "data_time": 0.008987188339233398, "loss": 0.01123046875, "time": 1.2627887725830078, "tflops": 5.653316041104631, "tokens_per_sec": 93.44397302372275, "iter": 7070, "memory": 16131, "step": 7070} +{"base_lr": 1.1646579422439397e-06, "lr": 1.1646579422439397e-06, "data_time": 0.008894681930541992, "loss": 0.004669189453125, "time": 1.2538714408874512, "tflops": 5.838504478970167, "tokens_per_sec": 96.50112129060334, "iter": 7080, "memory": 16131, "step": 7080} +{"base_lr": 1.1624690280960355e-06, "lr": 1.1624690280960355e-06, "data_time": 0.008741140365600586, "loss": 1.375, "time": 1.016202449798584, "tflops": 4.40301135738694, "tokens_per_sec": 72.82013541159472, "iter": 7090, "memory": 16130, "step": 7090} +{"base_lr": 1.160279314104693e-06, "lr": 1.160279314104693e-06, "data_time": 0.008597373962402344, "loss": 2.421875, "time": 1.0332677364349365, "tflops": 6.322738528070799, "tokens_per_sec": 104.5227642281039, "iter": 7100, "memory": 16131, "step": 7100} +{"base_lr": 1.1580888110499876e-06, "lr": 1.1580888110499876e-06, "data_time": 0.008908748626708984, "loss": 2.09375, "time": 1.293395757675171, "tflops": 3.833777217501504, "tokens_per_sec": 63.39900182394941, "iter": 7110, "memory": 16131, "step": 7110} +{"base_lr": 1.15589752971588e-06, "lr": 1.15589752971588e-06, "data_time": 0.008710384368896484, "loss": 1.515625, "time": 1.0113894939422607, "tflops": 5.0823226962731, "tokens_per_sec": 84.04279509429877, "iter": 7120, "memory": 16130, "step": 7120} +{"base_lr": 1.1537054808901603e-06, "lr": 1.1537054808901603e-06, "data_time": 0.007485389709472656, "loss": 0.0341796875, "time": 0.9707939624786377, "tflops": 6.604829431223919, "tokens_per_sec": 109.18897736987455, "iter": 7130, "memory": 16131, "step": 7130} +{"base_lr": 1.1515126753643995e-06, "lr": 1.1515126753643995e-06, "data_time": 0.008643627166748047, "loss": 1.5234375, "time": 1.2587590217590332, "tflops": 3.65073956167923, "tokens_per_sec": 60.37692575480778, "iter": 7140, "memory": 16130, "step": 7140} +{"base_lr": 1.149319123933893e-06, "lr": 1.149319123933893e-06, "data_time": 0.00826573371887207, "loss": 0.05615234375, "time": 1.0186567306518555, "tflops": 6.770268757299618, "tokens_per_sec": 111.91208634820248, "iter": 7150, "memory": 16130, "step": 7150} +{"base_lr": 1.1471248373976072e-06, "lr": 1.1471248373976072e-06, "data_time": 0.007601499557495117, "loss": 0.01275634765625, "time": 0.9586939811706543, "tflops": 6.625007011891096, "tokens_per_sec": 109.52400042365525, "iter": 7160, "memory": 16131, "step": 7160} +{"base_lr": 1.1449298265581282e-06, "lr": 1.1449298265581282e-06, "data_time": 0.008692264556884766, "loss": 0.007659912109375, "time": 1.035008430480957, "tflops": 6.604770168096876, "tokens_per_sec": 109.17785466479819, "iter": 7170, "memory": 16131, "step": 7170} +{"base_lr": 1.142734102221609e-06, "lr": 1.142734102221609e-06, "data_time": 0.00858449935913086, "loss": 1.625, "time": 1.0312988758087158, "tflops": 6.687275628548714, "tokens_per_sec": 110.54021552238562, "iter": 7180, "memory": 16131, "step": 7180} +{"base_lr": 1.1405376751977135e-06, "lr": 1.1405376751977135e-06, "data_time": 0.008353710174560547, "loss": 1.4453125, "time": 1.278451919555664, "tflops": 2.9318139811873243, "tokens_per_sec": 48.49615308294119, "iter": 7190, "memory": 16131, "step": 7190} +{"base_lr": 1.138340556299566e-06, "lr": 1.138340556299566e-06, "data_time": 0.008736371994018555, "loss": 1.2265625, "time": 1.1909384727478027, "tflops": 6.350574224496689, "tokens_per_sec": 104.95924253037836, "iter": 7200, "memory": 16131, "step": 7200} +{"base_lr": 1.1361427563436964e-06, "lr": 1.1361427563436964e-06, "data_time": 0.008352994918823242, "loss": 0.01416015625, "time": 1.0245954990386963, "tflops": 6.790161075937398, "tokens_per_sec": 112.23941556232086, "iter": 7210, "memory": 16131, "step": 7210} +{"base_lr": 1.1339442861499875e-06, "lr": 1.1339442861499875e-06, "data_time": 0.009007692337036133, "loss": 0.03173828125, "time": 1.0156433582305908, "tflops": 7.983764779706297, "tokens_per_sec": 131.9360766886882, "iter": 7220, "memory": 16131, "step": 7220} +{"base_lr": 1.1317451565416225e-06, "lr": 1.1317451565416225e-06, "data_time": 0.008941411972045898, "loss": 1.7109375, "time": 1.0287396907806396, "tflops": 5.173163696379255, "tokens_per_sec": 85.54156196028299, "iter": 7230, "memory": 16131, "step": 7230} +{"base_lr": 1.1295453783450298e-06, "lr": 1.1295453783450298e-06, "data_time": 0.008691787719726562, "loss": 2.0, "time": 1.018434762954712, "tflops": 4.155660712323942, "tokens_per_sec": 68.73292482362372, "iter": 7240, "memory": 16130, "step": 7240} +{"base_lr": 1.1273449623898317e-06, "lr": 1.1273449623898317e-06, "data_time": 0.009306669235229492, "loss": 0.0133056640625, "time": 1.0010392665863037, "tflops": 7.192064612866118, "tokens_per_sec": 118.8764556716034, "iter": 7250, "memory": 16131, "step": 7250} +{"base_lr": 1.12514391950879e-06, "lr": 1.12514391950879e-06, "data_time": 0.009049415588378906, "loss": 2.375, "time": 1.0139679908752441, "tflops": 6.861329488466199, "tokens_per_sec": 113.4158090144641, "iter": 7260, "memory": 16130, "step": 7260} +{"base_lr": 1.1229422605377525e-06, "lr": 1.1229422605377525e-06, "data_time": 0.008225202560424805, "loss": 1.625, "time": 1.0032596588134766, "tflops": 8.20316127257129, "tokens_per_sec": 135.5581267572418, "iter": 7270, "memory": 16131, "step": 7270} +{"base_lr": 1.1207399963156e-06, "lr": 1.1207399963156e-06, "data_time": 0.008573055267333984, "loss": 0.025390625, "time": 0.9986610412597656, "tflops": 7.694660206480907, "tokens_per_sec": 127.17027575208911, "iter": 7280, "memory": 16131, "step": 7280} +{"base_lr": 1.1185371376841937e-06, "lr": 1.1185371376841937e-06, "data_time": 0.008667945861816406, "loss": 0.031494140625, "time": 1.06719970703125, "tflops": 5.440648901063218, "tokens_per_sec": 89.95504718321567, "iter": 7290, "memory": 16131, "step": 7290} +{"base_lr": 1.1163336954883208e-06, "lr": 1.1163336954883208e-06, "data_time": 0.008593082427978516, "loss": 0.1728515625, "time": 1.0141985416412354, "tflops": 6.142989299992254, "tokens_per_sec": 101.55802416479304, "iter": 7300, "memory": 16130, "step": 7300} +{"base_lr": 1.1141296805756414e-06, "lr": 1.1141296805756414e-06, "data_time": 0.008672714233398438, "loss": 2.0, "time": 1.0428204536437988, "tflops": 4.9871958653502375, "tokens_per_sec": 82.4686547903988, "iter": 7310, "memory": 16130, "step": 7310} +{"base_lr": 1.1119251037966345e-06, "lr": 1.1119251037966345e-06, "data_time": 0.00830388069152832, "loss": 1.953125, "time": 1.371347188949585, "tflops": 3.792438484305234, "tokens_per_sec": 62.712054753844626, "iter": 7320, "memory": 16130, "step": 7320} +{"base_lr": 1.1097199760045462e-06, "lr": 1.1097199760045462e-06, "data_time": 0.008495330810546875, "loss": 2.21875, "time": 1.2626380920410156, "tflops": 3.0643738160112854, "tokens_per_sec": 50.68752511378401, "iter": 7330, "memory": 16131, "step": 7330} +{"base_lr": 1.1075143080553346e-06, "lr": 1.1075143080553346e-06, "data_time": 0.008533716201782227, "loss": 0.035400390625, "time": 1.0007781982421875, "tflops": 7.860074029319059, "tokens_per_sec": 129.89891289419376, "iter": 7340, "memory": 16131, "step": 7340} +{"base_lr": 1.105308110807617e-06, "lr": 1.105308110807617e-06, "data_time": 0.008748769760131836, "loss": 0.04833984375, "time": 1.0189363956451416, "tflops": 5.460631346614779, "tokens_per_sec": 90.29022850995496, "iter": 7350, "memory": 16131, "step": 7350} +{"base_lr": 1.103101395122617e-06, "lr": 1.103101395122617e-06, "data_time": 0.008434772491455078, "loss": 0.00531005859375, "time": 1.3048782348632812, "tflops": 5.610281348927105, "tokens_per_sec": 92.72895873888574, "iter": 7360, "memory": 16130, "step": 7360} +{"base_lr": 1.1008941718641105e-06, "lr": 1.1008941718641105e-06, "data_time": 0.008824586868286133, "loss": 0.0283203125, "time": 1.007415533065796, "tflops": 7.56763144692798, "tokens_per_sec": 125.0725205878334, "iter": 7370, "memory": 16130, "step": 7370} +{"base_lr": 1.0986864518983714e-06, "lr": 1.0986864518983714e-06, "data_time": 0.008580923080444336, "loss": 0.00927734375, "time": 1.2364389896392822, "tflops": 5.185802606326273, "tokens_per_sec": 85.73006908398983, "iter": 7380, "memory": 16131, "step": 7380} +{"base_lr": 1.096478246094119e-06, "lr": 1.096478246094119e-06, "data_time": 0.008475542068481445, "loss": 1.4140625, "time": 1.0540924072265625, "tflops": 4.30216109110156, "tokens_per_sec": 71.15125721971796, "iter": 7390, "memory": 16130, "step": 7390} +{"base_lr": 1.0942695653224662e-06, "lr": 1.0942695653224662e-06, "data_time": 0.008368968963623047, "loss": 0.0093994140625, "time": 1.001171350479126, "tflops": 6.94902872802302, "tokens_per_sec": 114.86545229730166, "iter": 7400, "memory": 16131, "step": 7400} +{"base_lr": 1.0920604204568615e-06, "lr": 1.0920604204568615e-06, "data_time": 0.008386850357055664, "loss": 0.08935546875, "time": 1.0105645656585693, "tflops": 6.64462596977899, "tokens_per_sec": 109.83959241392277, "iter": 7410, "memory": 16131, "step": 7410} +{"base_lr": 1.0898508223730397e-06, "lr": 1.0898508223730397e-06, "data_time": 0.008121013641357422, "loss": 0.020263671875, "time": 1.0328259468078613, "tflops": 6.560066969043606, "tokens_per_sec": 108.44034306656233, "iter": 7420, "memory": 16131, "step": 7420} +{"base_lr": 1.0876407819489668e-06, "lr": 1.0876407819489668e-06, "data_time": 0.008166790008544922, "loss": 0.0038909912109375, "time": 1.0209472179412842, "tflops": 5.390566933217423, "tokens_per_sec": 89.13291343641663, "iter": 7430, "memory": 16131, "step": 7430} +{"base_lr": 1.0854303100647869e-06, "lr": 1.0854303100647869e-06, "data_time": 0.008682727813720703, "loss": 0.1103515625, "time": 1.058741569519043, "tflops": 5.770127318727471, "tokens_per_sec": 95.39627318665319, "iter": 7440, "memory": 16130, "step": 7440} +{"base_lr": 1.0832194176027665e-06, "lr": 1.0832194176027665e-06, "data_time": 0.008159399032592773, "loss": 2.1875, "time": 1.0463128089904785, "tflops": 5.722899394153138, "tokens_per_sec": 94.61797576140184, "iter": 7450, "memory": 16131, "step": 7450} diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/config.py b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/config.py new file mode 100644 index 0000000000000000000000000000000000000000..348fa4a2ed2b288a9b954eaaa7658bcf2c9460ca --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/config.py @@ -0,0 +1,261 @@ +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = None +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-06, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2a_easy.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a' diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/eval_outputs_iter_4095.txt b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/eval_outputs_iter_4095.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e3ec6b9deb2341b4980e1ea9fd5f0613733cf1f --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/eval_outputs_iter_4095.txt @@ -0,0 +1,24 @@ +Eval output 1: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The luminal surface of the tumor cells is smooth and regular, with no evidence of necrosis or hemorrhage. The tumor cells are well-differentiated and show a cohesive growth pattern.<|im_end|> + + +Eval output 2: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates that the tissue sample is composed of a mixture of benign and malignant cells. The malignant cells are characterized by their irregular nuclear morphology, hyperchromatic nuclei, and pleomorphic features. The benign cells exhibit a more uniform nuclear appearance. The report also notes the presence of necrosis, which is a significant finding as it suggests the potential for aggressive behavior. Additionally, the report mentions the presence of vascular invasion, indicating that the cancer has spread into the blood vessels, which is a concerning feature. The report concludes by stating that the overall assessment is of invasive ductal carcinoma, which is a type of breast cancer.<|im_end|> + + +Eval output 3: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + + diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/eval_outputs_iter_7451.txt b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/eval_outputs_iter_7451.txt new file mode 100644 index 0000000000000000000000000000000000000000..572e5582a6eb67433f7c4ac1d5590ba31fa97faa --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/eval_outputs_iter_7451.txt @@ -0,0 +1,24 @@ +Eval output 1: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and separated by fibrous stroma.<|im_end|> + + +Eval output 2: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image reveals a well-demarcated area of neoplastic tissue, characterized by the presence of malignant cells. These cells exhibit marked pleomorphism, with significant variation in cell size and shape. The neoplastic cells are arranged in irregular clusters and cords, with areas of necrosis noted within the tumor. The tumor cells show marked atypia, with irregular nuclear contours, prominent nucleoli, and frequent mitotic figures. The tumor displays a high degree of cellular atypia, indicative of a poorly differentiated adenocarcinoma. The tumor cells infiltrate the surrounding stroma, with evidence of vascular invasion. The tumor also shows invasion into the adjacent lung parenchyma. The tumor cells express positivity for cytokeratin, indicating their epithelial origin. The absence of estrogen and progesterone receptors suggests that the tumor is hormone receptor negative. The Ki-67 proliferation index is elevated, indicating a high proliferative activity. The tumor cells show no evidence of neuroendocrine differentiation.<|im_end|> + + +Eval output 3: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + + diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/scalars.json b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/scalars.json new file mode 100644 index 0000000000000000000000000000000000000000..1125c760b5dce9c192552c9e18265acfa31e6221 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/20250925_202658/vis_data/scalars.json @@ -0,0 +1,745 @@ +{"base_lr": 4.395161290322582e-08, "lr": 4.395161290322582e-08, "data_time": 0.008252859115600586, "loss": 2.15625, "time": 1.0588200092315674, "tflops": 3.5971065187335634, "tokens_per_sec": 59.500197815171994, "iter": 10, "memory": 15740, "step": 10} +{"base_lr": 7.056451612903229e-08, "lr": 7.056451612903229e-08, "data_time": 0.00792551040649414, "loss": 0.326171875, "time": 1.0317912101745605, "tflops": 7.565075109627917, "tokens_per_sec": 125.02529458266123, "iter": 20, "memory": 15740, "step": 20} +{"base_lr": 9.717741935483876e-08, "lr": 9.717741935483876e-08, "data_time": 0.0076558589935302734, "loss": 1.6875, "time": 1.0424790382385254, "tflops": 4.001761948860862, "tokens_per_sec": 66.18838122301524, "iter": 30, "memory": 15740, "step": 30} +{"base_lr": 1.237903225806452e-07, "lr": 1.237903225806452e-07, "data_time": 0.00793910026550293, "loss": 1.7734375, "time": 1.0455522537231445, "tflops": 4.2215280057542985, "tokens_per_sec": 69.8195616144309, "iter": 40, "memory": 15739, "step": 40} +{"base_lr": 1.504032258064516e-07, "lr": 1.504032258064516e-07, "data_time": 0.00835728645324707, "loss": 0.5546875, "time": 1.061682939529419, "tflops": 6.438827027209791, "tokens_per_sec": 106.4347893260673, "iter": 50, "memory": 15742, "step": 50} +{"base_lr": 1.7701612903225805e-07, "lr": 1.7701612903225805e-07, "data_time": 0.008299589157104492, "loss": 0.59375, "time": 1.0494346618652344, "tflops": 6.052167493901179, "tokens_per_sec": 100.05387073195773, "iter": 60, "memory": 15740, "step": 60} +{"base_lr": 2.036290322580645e-07, "lr": 2.036290322580645e-07, "data_time": 0.008434057235717773, "loss": 0.3125, "time": 1.0530741214752197, "tflops": 5.973731228731991, "tokens_per_sec": 98.75848041371559, "iter": 70, "memory": 16130, "step": 70} +{"base_lr": 2.3024193548387097e-07, "lr": 2.3024193548387097e-07, "data_time": 0.00812673568725586, "loss": 0.39453125, "time": 1.047574520111084, "tflops": 6.756894553634868, "tokens_per_sec": 111.68656525502522, "iter": 80, "memory": 16130, "step": 80} +{"base_lr": 2.568548387096774e-07, "lr": 2.568548387096774e-07, "data_time": 0.008737802505493164, "loss": 2.53125, "time": 1.1176350116729736, "tflops": 5.0867628493464006, "tokens_per_sec": 84.10616974069958, "iter": 90, "memory": 16131, "step": 90} +{"base_lr": 2.8346774193548383e-07, "lr": 2.8346774193548383e-07, "data_time": 0.008491754531860352, "loss": 0.28125, "time": 1.0414159297943115, "tflops": 6.505957115293689, "tokens_per_sec": 107.54588709048592, "iter": 100, "memory": 16130, "step": 100} +{"base_lr": 3.100806451612901e-07, "lr": 3.100806451612901e-07, "data_time": 0.008758068084716797, "loss": 0.9375, "time": 1.0598418712615967, "tflops": 5.64984560719282, "tokens_per_sec": 93.41016116117457, "iter": 110, "memory": 16131, "step": 110} +{"base_lr": 3.366935483870966e-07, "lr": 3.366935483870966e-07, "data_time": 0.008307933807373047, "loss": 1.265625, "time": 1.166724443435669, "tflops": 3.2644283564997063, "tokens_per_sec": 53.99732589336096, "iter": 120, "memory": 16131, "step": 120} +{"base_lr": 3.63306451612903e-07, "lr": 3.63306451612903e-07, "data_time": 0.008579492568969727, "loss": 1.75, "time": 1.090517282485962, "tflops": 2.383172245238646, "tokens_per_sec": 39.43082855315876, "iter": 130, "memory": 16130, "step": 130} +{"base_lr": 3.8991935483870945e-07, "lr": 3.8991935483870945e-07, "data_time": 0.008089303970336914, "loss": 0.19921875, "time": 1.2434301376342773, "tflops": 6.033743470091637, "tokens_per_sec": 99.72413909463376, "iter": 140, "memory": 16131, "step": 140} +{"base_lr": 4.1653225806451577e-07, "lr": 4.1653225806451577e-07, "data_time": 0.008330821990966797, "loss": 1.8515625, "time": 1.0202295780181885, "tflops": 5.216314970703492, "tokens_per_sec": 86.2550958097638, "iter": 150, "memory": 16130, "step": 150} +{"base_lr": 4.4314516129032225e-07, "lr": 4.4314516129032225e-07, "data_time": 0.007828474044799805, "loss": 1.7890625, "time": 1.039480209350586, "tflops": 3.838662238398019, "tokens_per_sec": 63.49327231652628, "iter": 160, "memory": 16130, "step": 160} +{"base_lr": 4.697580645161287e-07, "lr": 4.697580645161287e-07, "data_time": 0.00848531723022461, "loss": 2.4375, "time": 1.0166044235229492, "tflops": 4.163142747116224, "tokens_per_sec": 68.85667461228682, "iter": 170, "memory": 16131, "step": 170} +{"base_lr": 4.963709677419353e-07, "lr": 4.963709677419353e-07, "data_time": 0.00827169418334961, "loss": 0.361328125, "time": 1.0777912139892578, "tflops": 6.736124452899487, "tokens_per_sec": 111.33881817029237, "iter": 180, "memory": 16131, "step": 180} +{"base_lr": 5.229838709677418e-07, "lr": 5.229838709677418e-07, "data_time": 0.008142948150634766, "loss": 0.75, "time": 1.0406582355499268, "tflops": 6.685353787141135, "tokens_per_sec": 110.50698113114795, "iter": 190, "memory": 16131, "step": 190} +{"base_lr": 5.49596774193548e-07, "lr": 5.49596774193548e-07, "data_time": 0.008439064025878906, "loss": 0.84375, "time": 1.0235364437103271, "tflops": 6.205303569348335, "tokens_per_sec": 102.58550210412795, "iter": 200, "memory": 16131, "step": 200} +{"base_lr": 5.762096774193546e-07, "lr": 5.762096774193546e-07, "data_time": 0.008361339569091797, "loss": 0.306640625, "time": 1.2279424667358398, "tflops": 5.172354991841126, "tokens_per_sec": 85.50889218696801, "iter": 210, "memory": 16130, "step": 210} +{"base_lr": 6.028225806451608e-07, "lr": 6.028225806451608e-07, "data_time": 0.008082389831542969, "loss": 0.4140625, "time": 1.0067017078399658, "tflops": 6.971040840936602, "tokens_per_sec": 115.2277771026938, "iter": 220, "memory": 16132, "step": 220} +{"base_lr": 6.294354838709672e-07, "lr": 6.294354838709672e-07, "data_time": 0.008601903915405273, "loss": 1.3046875, "time": 1.0299232006072998, "tflops": 4.050547599323962, "tokens_per_sec": 66.99528659927923, "iter": 230, "memory": 16131, "step": 230} +{"base_lr": 6.560483870967735e-07, "lr": 6.560483870967735e-07, "data_time": 0.008841991424560547, "loss": 2.09375, "time": 1.0509626865386963, "tflops": 6.677457544288305, "tokens_per_sec": 110.37499378967581, "iter": 240, "memory": 16131, "step": 240} +{"base_lr": 6.826612903225798e-07, "lr": 6.826612903225798e-07, "data_time": 0.008244752883911133, "loss": 1.40625, "time": 1.02738356590271, "tflops": 3.5893893913674124, "tokens_per_sec": 59.37412474214828, "iter": 250, "memory": 16130, "step": 250} +{"base_lr": 7.092741935483862e-07, "lr": 7.092741935483862e-07, "data_time": 0.007828235626220703, "loss": 1.984375, "time": 1.2792456150054932, "tflops": 4.870222509493908, "tokens_per_sec": 80.51620329335832, "iter": 260, "memory": 16131, "step": 260} +{"base_lr": 7.358870967741926e-07, "lr": 7.358870967741926e-07, "data_time": 0.008242130279541016, "loss": 0.294921875, "time": 1.1922528743743896, "tflops": 6.699435295198208, "tokens_per_sec": 110.71476767808471, "iter": 270, "memory": 16131, "step": 270} +{"base_lr": 7.624999999999988e-07, "lr": 7.624999999999988e-07, "data_time": 0.00824737548828125, "loss": 2.5, "time": 1.2521777153015137, "tflops": 4.588566552385814, "tokens_per_sec": 75.86782518090808, "iter": 280, "memory": 16131, "step": 280} +{"base_lr": 7.891129032258054e-07, "lr": 7.891129032258054e-07, "data_time": 0.007684469223022461, "loss": 0.75390625, "time": 1.020216941833496, "tflops": 6.403620111648591, "tokens_per_sec": 105.85983781625949, "iter": 290, "memory": 16131, "step": 290} +{"base_lr": 8.157258064516119e-07, "lr": 8.157258064516119e-07, "data_time": 0.00827336311340332, "loss": 2.78125, "time": 1.0804007053375244, "tflops": 4.8137231205253945, "tokens_per_sec": 79.60009612642136, "iter": 300, "memory": 16130, "step": 300} +{"base_lr": 8.423387096774183e-07, "lr": 8.423387096774183e-07, "data_time": 0.007940530776977539, "loss": 0.52734375, "time": 1.2500593662261963, "tflops": 4.935478734400104, "tokens_per_sec": 81.59612475673548, "iter": 310, "memory": 16130, "step": 310} +{"base_lr": 8.689516129032248e-07, "lr": 8.689516129032248e-07, "data_time": 0.008423566818237305, "loss": 0.146484375, "time": 1.318004846572876, "tflops": 7.256421607126407, "tokens_per_sec": 119.8781631271823, "iter": 320, "memory": 16131, "step": 320} +{"base_lr": 8.955645161290312e-07, "lr": 8.955645161290312e-07, "data_time": 0.008231878280639648, "loss": 0.267578125, "time": 1.0330135822296143, "tflops": 6.910793959901945, "tokens_per_sec": 114.22889498238676, "iter": 330, "memory": 16130, "step": 330} +{"base_lr": 9.221774193548377e-07, "lr": 9.221774193548377e-07, "data_time": 0.008239507675170898, "loss": 0.5859375, "time": 1.040961742401123, "tflops": 6.217811918098017, "tokens_per_sec": 102.78956050112545, "iter": 340, "memory": 16132, "step": 340} +{"base_lr": 9.487903225806442e-07, "lr": 9.487903225806442e-07, "data_time": 0.00878286361694336, "loss": 0.203125, "time": 1.02880859375, "tflops": 6.350143035991382, "tokens_per_sec": 104.97579496904841, "iter": 350, "memory": 16131, "step": 350} +{"base_lr": 9.754032258064507e-07, "lr": 9.754032258064507e-07, "data_time": 0.008882761001586914, "loss": 0.140625, "time": 1.0657086372375488, "tflops": 6.9262213361040414, "tokens_per_sec": 114.47781854909697, "iter": 360, "memory": 16131, "step": 360} +{"base_lr": 1.0020161290322574e-06, "lr": 1.0020161290322574e-06, "data_time": 0.00839543342590332, "loss": 1.859375, "time": 1.0951733589172363, "tflops": 7.95758713736447, "tokens_per_sec": 131.48603262431146, "iter": 370, "memory": 16131, "step": 370} +{"base_lr": 1.028629032258064e-06, "lr": 1.028629032258064e-06, "data_time": 0.00841975212097168, "loss": 2.09375, "time": 1.0826020240783691, "tflops": 6.706190816384188, "tokens_per_sec": 110.84405657013846, "iter": 380, "memory": 16131, "step": 380} +{"base_lr": 1.0552419354838708e-06, "lr": 1.0552419354838708e-06, "data_time": 0.008339166641235352, "loss": 0.306640625, "time": 1.0399360656738281, "tflops": 7.447541383565911, "tokens_per_sec": 123.08448973441374, "iter": 390, "memory": 16131, "step": 390} +{"base_lr": 1.0818548387096769e-06, "lr": 1.0818548387096769e-06, "data_time": 0.008753538131713867, "loss": 0.2158203125, "time": 1.0642521381378174, "tflops": 7.391284868712155, "tokens_per_sec": 122.1515046494022, "iter": 400, "memory": 16131, "step": 400} +{"base_lr": 1.1084677419354831e-06, "lr": 1.1084677419354831e-06, "data_time": 0.009238243103027344, "loss": 2.359375, "time": 1.0673494338989258, "tflops": 6.631708740041046, "tokens_per_sec": 109.61733457102284, "iter": 410, "memory": 16130, "step": 410} +{"base_lr": 1.1350806451612896e-06, "lr": 1.1350806451612896e-06, "data_time": 0.007996797561645508, "loss": 2.5625, "time": 1.0594053268432617, "tflops": 4.39485139971969, "tokens_per_sec": 72.68228509796742, "iter": 420, "memory": 16131, "step": 420} +{"base_lr": 1.1616935483870963e-06, "lr": 1.1616935483870963e-06, "data_time": 0.008247852325439453, "loss": 0.259765625, "time": 1.0520517826080322, "tflops": 6.325013132849636, "tokens_per_sec": 104.55759100298833, "iter": 430, "memory": 16131, "step": 430} +{"base_lr": 1.188306451612903e-06, "lr": 1.188306451612903e-06, "data_time": 0.008158683776855469, "loss": 0.265625, "time": 1.0134432315826416, "tflops": 6.685532220531496, "tokens_per_sec": 110.51433026493739, "iter": 440, "memory": 16131, "step": 440} +{"base_lr": 1.2149193548387095e-06, "lr": 1.2149193548387095e-06, "data_time": 0.008710145950317383, "loss": 0.1796875, "time": 1.0389196872711182, "tflops": 7.979947673722839, "tokens_per_sec": 131.8677484683341, "iter": 450, "memory": 16131, "step": 450} +{"base_lr": 1.2415322580645158e-06, "lr": 1.2415322580645158e-06, "data_time": 0.008257865905761719, "loss": 1.359375, "time": 1.3634393215179443, "tflops": 3.459235874260699, "tokens_per_sec": 57.20826645450112, "iter": 460, "memory": 16131, "step": 460} +{"base_lr": 1.2681451612903222e-06, "lr": 1.2681451612903222e-06, "data_time": 0.008691549301147461, "loss": 2.609375, "time": 1.0664176940917969, "tflops": 4.309194591214387, "tokens_per_sec": 71.2666344725772, "iter": 470, "memory": 16130, "step": 470} +{"base_lr": 1.2947580645161285e-06, "lr": 1.2947580645161285e-06, "data_time": 0.008431434631347656, "loss": 0.30859375, "time": 1.041290521621704, "tflops": 6.506740662417137, "tokens_per_sec": 107.55883941540525, "iter": 480, "memory": 16131, "step": 480} +{"base_lr": 1.3213709677419352e-06, "lr": 1.3213709677419352e-06, "data_time": 0.007923126220703125, "loss": 0.314453125, "time": 1.4173884391784668, "tflops": 6.020252748314934, "tokens_per_sec": 99.47872869742439, "iter": 490, "memory": 16131, "step": 490} +{"base_lr": 1.347983870967742e-06, "lr": 1.347983870967742e-06, "data_time": 0.008606433868408203, "loss": 2.4375, "time": 1.1120550632476807, "tflops": 6.147171153023487, "tokens_per_sec": 101.61367340020881, "iter": 500, "memory": 16131, "step": 500} +{"base_lr": 1.3745967741935486e-06, "lr": 1.3745967741935486e-06, "data_time": 0.008076906204223633, "loss": 1.6484375, "time": 1.1908760070800781, "tflops": 5.079040520117054, "tokens_per_sec": 83.97179841174827, "iter": 510, "memory": 16131, "step": 510} +{"base_lr": 1.4012096774193555e-06, "lr": 1.4012096774193555e-06, "data_time": 0.00827336311340332, "loss": 1.90625, "time": 1.0396003723144531, "tflops": 8.149676055703795, "tokens_per_sec": 134.6671314557002, "iter": 520, "memory": 16131, "step": 520} +{"base_lr": 1.4278225806451622e-06, "lr": 1.4278225806451622e-06, "data_time": 0.008142948150634766, "loss": 2.34375, "time": 1.044724702835083, "tflops": 4.398672058662919, "tokens_per_sec": 72.74643721325346, "iter": 530, "memory": 16131, "step": 530} +{"base_lr": 1.4544354838709687e-06, "lr": 1.4544354838709687e-06, "data_time": 0.008404731750488281, "loss": 2.546875, "time": 1.106694221496582, "tflops": 7.10782668990125, "tokens_per_sec": 117.46695471499217, "iter": 540, "memory": 16131, "step": 540} +{"base_lr": 1.4810483870967756e-06, "lr": 1.4810483870967756e-06, "data_time": 0.008424758911132812, "loss": 1.9609375, "time": 1.26153564453125, "tflops": 3.354855132967117, "tokens_per_sec": 55.487928782190274, "iter": 550, "memory": 16130, "step": 550} +{"base_lr": 1.5076612903225823e-06, "lr": 1.5076612903225823e-06, "data_time": 0.008009195327758789, "loss": 0.26171875, "time": 1.0370826721191406, "tflops": 7.584921565644231, "tokens_per_sec": 125.35162672637941, "iter": 560, "memory": 16130, "step": 560} +{"base_lr": 1.5342741935483888e-06, "lr": 1.5342741935483888e-06, "data_time": 0.008609294891357422, "loss": 2.359375, "time": 1.0406486988067627, "tflops": 7.500684920256563, "tokens_per_sec": 123.96114091892018, "iter": 570, "memory": 16131, "step": 570} +{"base_lr": 1.5608870967741953e-06, "lr": 1.5608870967741953e-06, "data_time": 0.00798654556274414, "loss": 0.2294921875, "time": 1.0319414138793945, "tflops": 7.15286139506319, "tokens_per_sec": 118.22376576713317, "iter": 580, "memory": 16130, "step": 580} +{"base_lr": 1.5875000000000022e-06, "lr": 1.5875000000000022e-06, "data_time": 0.010207414627075195, "loss": 1.3515625, "time": 1.0612356662750244, "tflops": 3.4748923255287996, "tokens_per_sec": 57.48016386789442, "iter": 590, "memory": 16131, "step": 590} +{"base_lr": 1.6141129032258084e-06, "lr": 1.6141129032258084e-06, "data_time": 0.008874893188476562, "loss": 1.90625, "time": 1.070939064025879, "tflops": 6.609480228367451, "tokens_per_sec": 109.2499133984933, "iter": 600, "memory": 16131, "step": 600} +{"base_lr": 1.6407258064516147e-06, "lr": 1.6407258064516147e-06, "data_time": 0.007974863052368164, "loss": 3.015625, "time": 1.0254802703857422, "tflops": 4.304157035523853, "tokens_per_sec": 71.18615746012286, "iter": 610, "memory": 16130, "step": 610} +{"base_lr": 1.6673387096774214e-06, "lr": 1.6673387096774214e-06, "data_time": 0.008660078048706055, "loss": 2.5625, "time": 1.0109422206878662, "tflops": 7.00173602864142, "tokens_per_sec": 115.73361721926601, "iter": 620, "memory": 16131, "step": 620} +{"base_lr": 1.6939516129032277e-06, "lr": 1.6939516129032277e-06, "data_time": 0.008180856704711914, "loss": 0.427734375, "time": 1.0164997577667236, "tflops": 6.844240171230571, "tokens_per_sec": 113.13332750078057, "iter": 630, "memory": 16131, "step": 630} +{"base_lr": 1.7205645161290337e-06, "lr": 1.7205645161290337e-06, "data_time": 0.008471250534057617, "loss": 2.484375, "time": 1.3150043487548828, "tflops": 4.047012336051175, "tokens_per_sec": 66.91993078445424, "iter": 640, "memory": 16130, "step": 640} +{"base_lr": 1.74717741935484e-06, "lr": 1.74717741935484e-06, "data_time": 0.008368968963623047, "loss": 1.078125, "time": 1.0985722541809082, "tflops": 4.072878147831967, "tokens_per_sec": 67.36015743917253, "iter": 650, "memory": 16131, "step": 650} +{"base_lr": 1.7737903225806463e-06, "lr": 1.7737903225806463e-06, "data_time": 0.008448600769042969, "loss": 1.9296875, "time": 1.1227614879608154, "tflops": 3.9312250802158903, "tokens_per_sec": 65.01826147645944, "iter": 660, "memory": 16130, "step": 660} +{"base_lr": 1.8004032258064532e-06, "lr": 1.8004032258064532e-06, "data_time": 0.008531808853149414, "loss": 1.1796875, "time": 1.0657172203063965, "tflops": 3.9145027106803205, "tokens_per_sec": 64.74513002623489, "iter": 670, "memory": 16130, "step": 670} +{"base_lr": 1.8270161290322595e-06, "lr": 1.8270161290322595e-06, "data_time": 0.008015155792236328, "loss": 0.0947265625, "time": 1.3633790016174316, "tflops": 6.747996586591716, "tokens_per_sec": 111.48770798110046, "iter": 680, "memory": 16131, "step": 680} +{"base_lr": 1.8536290322580662e-06, "lr": 1.8536290322580662e-06, "data_time": 0.008182048797607422, "loss": 0.4296875, "time": 1.232914924621582, "tflops": 4.31646881313008, "tokens_per_sec": 71.37556553380065, "iter": 690, "memory": 16131, "step": 690} +{"base_lr": 1.8802419354838724e-06, "lr": 1.8802419354838724e-06, "data_time": 0.008540868759155273, "loss": 1.6953125, "time": 1.2415738105773926, "tflops": 2.6290823986625154, "tokens_per_sec": 43.493185455356745, "iter": 700, "memory": 16130, "step": 700} +{"base_lr": 1.9068548387096791e-06, "lr": 1.9068548387096791e-06, "data_time": 0.008160114288330078, "loss": 0.287109375, "time": 1.068218469619751, "tflops": 6.17257697644279, "tokens_per_sec": 102.03905202902754, "iter": 710, "memory": 16131, "step": 710} +{"base_lr": 1.933467741935486e-06, "lr": 1.933467741935486e-06, "data_time": 0.008492708206176758, "loss": 2.65625, "time": 1.2647275924682617, "tflops": 3.8728146407219652, "tokens_per_sec": 64.04541221549148, "iter": 720, "memory": 16131, "step": 720} +{"base_lr": 1.960080645161292e-06, "lr": 1.960080645161292e-06, "data_time": 0.008059501647949219, "loss": 0.1962890625, "time": 1.3936829566955566, "tflops": 5.035405424368665, "tokens_per_sec": 83.2327032791981, "iter": 730, "memory": 16130, "step": 730} +{"base_lr": 1.9866935483870986e-06, "lr": 1.9866935483870986e-06, "data_time": 0.008306026458740234, "loss": 0.1865234375, "time": 1.0434024333953857, "tflops": 8.178087738039922, "tokens_per_sec": 135.13481997643999, "iter": 740, "memory": 16131, "step": 740} +{"base_lr": 1.9999996061557236e-06, "lr": 1.9999996061557236e-06, "data_time": 0.008544683456420898, "loss": 0.162109375, "time": 1.3108081817626953, "tflops": 6.232245523558006, "tokens_per_sec": 102.98989728486222, "iter": 750, "memory": 16131, "step": 750} +{"base_lr": 1.9999951754111564e-06, "lr": 1.9999951754111564e-06, "data_time": 0.008687496185302734, "loss": 2.171875, "time": 1.0354118347167969, "tflops": 4.613612563288863, "tokens_per_sec": 76.29814277864766, "iter": 760, "memory": 16130, "step": 760} +{"base_lr": 1.999985821638561e-06, "lr": 1.999985821638561e-06, "data_time": 0.008340835571289062, "loss": 2.0, "time": 1.0585877895355225, "tflops": 5.427703624463322, "tokens_per_sec": 89.74220271480128, "iter": 770, "memory": 16130, "step": 770} +{"base_lr": 1.9999715448839856e-06, "lr": 1.9999715448839856e-06, "data_time": 0.008420944213867188, "loss": 2.25, "time": 1.0833537578582764, "tflops": 4.968262374229149, "tokens_per_sec": 82.15229730302072, "iter": 780, "memory": 16131, "step": 780} +{"base_lr": 1.999952345217715e-06, "lr": 1.999952345217715e-06, "data_time": 0.008046150207519531, "loss": 0.439453125, "time": 1.01377272605896, "tflops": 5.4884451704443595, "tokens_per_sec": 90.75012341036152, "iter": 790, "memory": 16131, "step": 790} +{"base_lr": 1.999928222734272e-06, "lr": 1.999928222734272e-06, "data_time": 0.008173465728759766, "loss": 1.78125, "time": 1.2392878532409668, "tflops": 3.219763202521218, "tokens_per_sec": 53.25639223150985, "iter": 800, "memory": 16131, "step": 800} +{"base_lr": 1.99989917755241e-06, "lr": 1.99989917755241e-06, "data_time": 0.007458686828613281, "loss": 0.345703125, "time": 1.0087604522705078, "tflops": 6.716567211943395, "tokens_per_sec": 111.02735019776053, "iter": 810, "memory": 16131, "step": 810} +{"base_lr": 1.9998652098151227e-06, "lr": 1.9998652098151227e-06, "data_time": 0.008054971694946289, "loss": 2.296875, "time": 1.05586576461792, "tflops": 7.277778702035496, "tokens_per_sec": 120.28044118452544, "iter": 820, "memory": 16131, "step": 820} +{"base_lr": 1.9998263196896337e-06, "lr": 1.9998263196896337e-06, "data_time": 0.008306264877319336, "loss": 2.421875, "time": 1.0630297660827637, "tflops": 7.456804415404667, "tokens_per_sec": 123.23267342043324, "iter": 830, "memory": 16131, "step": 830} +{"base_lr": 1.9997825073674015e-06, "lr": 1.9997825073674015e-06, "data_time": 0.008565902709960938, "loss": 2.640625, "time": 1.3298759460449219, "tflops": 3.910703002173417, "tokens_per_sec": 64.66768592642124, "iter": 840, "memory": 16131, "step": 840} +{"base_lr": 1.999733773064116e-06, "lr": 1.999733773064116e-06, "data_time": 0.008158683776855469, "loss": 2.546875, "time": 1.0212271213531494, "tflops": 4.677695048382285, "tokens_per_sec": 77.35791416824675, "iter": 850, "memory": 16131, "step": 850} +{"base_lr": 1.9996801170196976e-06, "lr": 1.9996801170196976e-06, "data_time": 0.008026123046875, "loss": 0.2197265625, "time": 1.0231528282165527, "tflops": 5.734058967840276, "tokens_per_sec": 94.80499620861616, "iter": 860, "memory": 16131, "step": 860} +{"base_lr": 1.9996215394983004e-06, "lr": 1.9996215394983004e-06, "data_time": 0.008556842803955078, "loss": 1.609375, "time": 1.3322386741638184, "tflops": 3.9492124326223377, "tokens_per_sec": 65.3036138997694, "iter": 870, "memory": 16131, "step": 870} +{"base_lr": 1.9995580407883036e-06, "lr": 1.9995580407883036e-06, "data_time": 0.008893251419067383, "loss": 0.0947265625, "time": 1.0334019660949707, "tflops": 6.321917260963756, "tokens_per_sec": 104.50918765715815, "iter": 880, "memory": 16130, "step": 880} +{"base_lr": 1.999489621202313e-06, "lr": 1.999489621202313e-06, "data_time": 0.008881330490112305, "loss": 0.40625, "time": 1.0321481227874756, "tflops": 7.3275753750994905, "tokens_per_sec": 121.10664859060834, "iter": 890, "memory": 16130, "step": 890} +{"base_lr": 1.9994162810771625e-06, "lr": 1.9994162810771625e-06, "data_time": 0.008353233337402344, "loss": 1.2890625, "time": 1.0287361145019531, "tflops": 3.7611152693141823, "tokens_per_sec": 62.21226133479566, "iter": 900, "memory": 16131, "step": 900} +{"base_lr": 1.999338020773911e-06, "lr": 1.999338020773911e-06, "data_time": 0.008432626724243164, "loss": 1.8515625, "time": 1.036409616470337, "tflops": 4.025197066306239, "tokens_per_sec": 66.57599360658602, "iter": 910, "memory": 16131, "step": 910} +{"base_lr": 1.999254840677837e-06, "lr": 1.999254840677837e-06, "data_time": 0.00848698616027832, "loss": 2.328125, "time": 1.0552129745483398, "tflops": 6.478306247414664, "tokens_per_sec": 107.08738683606502, "iter": 920, "memory": 16130, "step": 920} +{"base_lr": 1.99916674119844e-06, "lr": 1.99916674119844e-06, "data_time": 0.008119344711303711, "loss": 0.259765625, "time": 1.0063934326171875, "tflops": 7.575317188156125, "tokens_per_sec": 125.19954514429223, "iter": 930, "memory": 16131, "step": 930} +{"base_lr": 1.999073722769438e-06, "lr": 1.999073722769438e-06, "data_time": 0.008267879486083984, "loss": 1.703125, "time": 1.0351288318634033, "tflops": 3.8547988466451084, "tokens_per_sec": 63.76017937895257, "iter": 940, "memory": 16131, "step": 940} +{"base_lr": 1.9989757858487664e-06, "lr": 1.9989757858487664e-06, "data_time": 0.008328914642333984, "loss": 2.140625, "time": 1.4201805591583252, "tflops": 3.193168158480085, "tokens_per_sec": 52.810186364187516, "iter": 950, "memory": 16131, "step": 950} +{"base_lr": 1.9988729309185732e-06, "lr": 1.9988729309185732e-06, "data_time": 0.011367082595825195, "loss": 1.8984375, "time": 1.3723738193511963, "tflops": 3.8778347024195456, "tokens_per_sec": 64.1224706848013, "iter": 960, "memory": 16131, "step": 960} +{"base_lr": 1.998765158485219e-06, "lr": 1.998765158485219e-06, "data_time": 0.008350133895874023, "loss": 0.2138671875, "time": 1.039778709411621, "tflops": 6.924107043294889, "tokens_per_sec": 114.44742897959894, "iter": 970, "memory": 16131, "step": 970} +{"base_lr": 1.9986524690792733e-06, "lr": 1.9986524690792733e-06, "data_time": 0.008599042892456055, "loss": 1.8359375, "time": 1.041391134262085, "tflops": 3.5992079140219593, "tokens_per_sec": 59.53574786660037, "iter": 980, "memory": 16131, "step": 980} +{"base_lr": 1.9985348632555117e-06, "lr": 1.9985348632555117e-06, "data_time": 0.008408546447753906, "loss": 0.1962890625, "time": 1.0268840789794922, "tflops": 6.893037602160027, "tokens_per_sec": 113.93691108363426, "iter": 990, "memory": 16131, "step": 990} +{"base_lr": 1.9984123415929133e-06, "lr": 1.9984123415929133e-06, "data_time": 0.008064746856689453, "loss": 0.1474609375, "time": 1.015733003616333, "tflops": 7.326664938594112, "tokens_per_sec": 121.09481484008073, "iter": 1000, "memory": 16130, "step": 1000} +{"base_lr": 1.9982849046946588e-06, "lr": 1.9982849046946588e-06, "data_time": 0.008089065551757812, "loss": 2.375, "time": 1.272737979888916, "tflops": 4.895124438664803, "tokens_per_sec": 80.92789060078874, "iter": 1010, "memory": 16131, "step": 1010} +{"base_lr": 1.998152553188127e-06, "lr": 1.998152553188127e-06, "data_time": 0.008710861206054688, "loss": 3.0, "time": 1.0319976806640625, "tflops": 7.03503107390829, "tokens_per_sec": 116.27933109565417, "iter": 1020, "memory": 16131, "step": 1020} +{"base_lr": 1.9980152877248906e-06, "lr": 1.9980152877248906e-06, "data_time": 0.008044242858886719, "loss": 1.71875, "time": 1.3313405513763428, "tflops": 6.454898882018872, "tokens_per_sec": 106.65941171332415, "iter": 1030, "memory": 16131, "step": 1030} +{"base_lr": 1.9978731089807145e-06, "lr": 1.9978731089807145e-06, "data_time": 0.00813603401184082, "loss": 0.21484375, "time": 1.2392292022705078, "tflops": 4.538780498608429, "tokens_per_sec": 75.04664982840215, "iter": 1040, "memory": 16131, "step": 1040} +{"base_lr": 1.997726017655552e-06, "lr": 1.997726017655552e-06, "data_time": 0.007851123809814453, "loss": 1.953125, "time": 1.0009186267852783, "tflops": 7.556201838588546, "tokens_per_sec": 124.8852770392999, "iter": 1050, "memory": 16131, "step": 1050} +{"base_lr": 1.997574014473542e-06, "lr": 1.997574014473542e-06, "data_time": 0.008387088775634766, "loss": 0.1591796875, "time": 1.0371522903442383, "tflops": 7.993546192596577, "tokens_per_sec": 132.09246248147087, "iter": 1060, "memory": 16131, "step": 1060} +{"base_lr": 1.997417100183004e-06, "lr": 1.997417100183004e-06, "data_time": 0.008855342864990234, "loss": 0.12109375, "time": 1.3419909477233887, "tflops": 5.048772787864959, "tokens_per_sec": 83.45808903548729, "iter": 1070, "memory": 16131, "step": 1070} +{"base_lr": 1.9972552755564346e-06, "lr": 1.9972552755564346e-06, "data_time": 0.008716821670532227, "loss": 1.71875, "time": 1.0529735088348389, "tflops": 5.9167782827357955, "tokens_per_sec": 97.81822537385216, "iter": 1080, "memory": 16131, "step": 1080} +{"base_lr": 1.9970885413905052e-06, "lr": 1.9970885413905052e-06, "data_time": 0.008357524871826172, "loss": 0.23046875, "time": 1.0397582054138184, "tflops": 6.749414126720182, "tokens_per_sec": 111.56439968052095, "iter": 1090, "memory": 16130, "step": 1090} +{"base_lr": 1.996916898506057e-06, "lr": 1.996916898506057e-06, "data_time": 0.008685111999511719, "loss": 0.0751953125, "time": 1.0236825942993164, "tflops": 7.210568922837958, "tokens_per_sec": 119.17756605345681, "iter": 1100, "memory": 16130, "step": 1100} +{"base_lr": 1.996740347748096e-06, "lr": 1.996740347748096e-06, "data_time": 0.008006811141967773, "loss": 0.37890625, "time": 1.0310401916503906, "tflops": 6.0426459024814845, "tokens_per_sec": 99.89911240514061, "iter": 1110, "memory": 16131, "step": 1110} +{"base_lr": 1.9965588899857913e-06, "lr": 1.9965588899857913e-06, "data_time": 0.008271932601928711, "loss": 2.265625, "time": 1.0251178741455078, "tflops": 4.718990480921642, "tokens_per_sec": 78.03980597509955, "iter": 1120, "memory": 16131, "step": 1120} +{"base_lr": 1.996372526112469e-06, "lr": 1.996372526112469e-06, "data_time": 0.008522510528564453, "loss": 2.03125, "time": 1.0514943599700928, "tflops": 6.501216806872774, "tokens_per_sec": 107.46610186583078, "iter": 1130, "memory": 16130, "step": 1130} +{"base_lr": 1.9961812570456086e-06, "lr": 1.9961812570456086e-06, "data_time": 0.00832819938659668, "loss": 0.12158203125, "time": 1.0147068500518799, "tflops": 6.856333408804165, "tokens_per_sec": 113.33322525023551, "iter": 1140, "memory": 16132, "step": 1140} +{"base_lr": 1.9959850837268362e-06, "lr": 1.9959850837268362e-06, "data_time": 0.008744478225708008, "loss": 0.07080078125, "time": 1.4088335037231445, "tflops": 5.1102838385661356, "tokens_per_sec": 84.46704290140212, "iter": 1150, "memory": 16130, "step": 1150} +{"base_lr": 1.995784007121924e-06, "lr": 1.995784007121924e-06, "data_time": 0.008178234100341797, "loss": 1.5078125, "time": 1.0357067584991455, "tflops": 3.969500184134815, "tokens_per_sec": 65.65564957640511, "iter": 1160, "memory": 16131, "step": 1160} +{"base_lr": 1.995578028220783e-06, "lr": 1.995578028220783e-06, "data_time": 0.008498668670654297, "loss": 1.453125, "time": 1.0115511417388916, "tflops": 5.261067485151875, "tokens_per_sec": 86.99510718621497, "iter": 1170, "memory": 16130, "step": 1170} +{"base_lr": 1.9953671480374565e-06, "lr": 1.9953671480374565e-06, "data_time": 0.009272098541259766, "loss": 0.078125, "time": 1.1071531772613525, "tflops": 5.408414177592238, "tokens_per_sec": 89.41852133306105, "iter": 1180, "memory": 16131, "step": 1180} +{"base_lr": 1.99515136761012e-06, "lr": 1.99515136761012e-06, "data_time": 0.008645057678222656, "loss": 2.546875, "time": 1.114736557006836, "tflops": 4.17670789953224, "tokens_per_sec": 69.07461634404686, "iter": 1190, "memory": 16132, "step": 1190} +{"base_lr": 1.99493068800107e-06, "lr": 1.99493068800107e-06, "data_time": 0.00860905647277832, "loss": 2.015625, "time": 1.1563637256622314, "tflops": 4.235739523765089, "tokens_per_sec": 70.04716440196403, "iter": 1200, "memory": 16130, "step": 1200} +{"base_lr": 1.9947051102967252e-06, "lr": 1.9947051102967252e-06, "data_time": 0.008734703063964844, "loss": 0.111328125, "time": 1.1182172298431396, "tflops": 6.980377152228251, "tokens_per_sec": 115.36220025690392, "iter": 1210, "memory": 16131, "step": 1210} +{"base_lr": 1.9944746356076162e-06, "lr": 1.9944746356076162e-06, "data_time": 0.008022308349609375, "loss": 0.1171875, "time": 1.1122207641601562, "tflops": 5.982842216094275, "tokens_per_sec": 98.90122855507258, "iter": 1220, "memory": 16130, "step": 1220} +{"base_lr": 1.9942392650683813e-06, "lr": 1.9942392650683813e-06, "data_time": 0.008499622344970703, "loss": 0.039306640625, "time": 1.0257453918457031, "tflops": 9.442225236033488, "tokens_per_sec": 155.98412751525368, "iter": 1230, "memory": 16131, "step": 1230} +{"base_lr": 1.9939989998377628e-06, "lr": 1.9939989998377628e-06, "data_time": 0.008163213729858398, "loss": 2.078125, "time": 1.1214323043823242, "tflops": 3.666060048837964, "tokens_per_sec": 60.63674083064087, "iter": 1240, "memory": 16130, "step": 1240} +{"base_lr": 1.9937538410985985e-06, "lr": 1.9937538410985985e-06, "data_time": 0.008352041244506836, "loss": 2.28125, "time": 1.0246052742004395, "tflops": 6.376193731842515, "tokens_per_sec": 105.40644550573236, "iter": 1250, "memory": 16132, "step": 1250} +{"base_lr": 1.993503790057816e-06, "lr": 1.993503790057816e-06, "data_time": 0.008065462112426758, "loss": 2.015625, "time": 1.104888916015625, "tflops": 4.76183031521409, "tokens_per_sec": 78.74094738288689, "iter": 1260, "memory": 16130, "step": 1260} +{"base_lr": 1.993248847946431e-06, "lr": 1.993248847946431e-06, "data_time": 0.00820612907409668, "loss": 0.2099609375, "time": 1.0216822624206543, "tflops": 6.453729279554388, "tokens_per_sec": 106.68678904304501, "iter": 1270, "memory": 16131, "step": 1270} +{"base_lr": 1.9929890160195366e-06, "lr": 1.9929890160195366e-06, "data_time": 0.00818490982055664, "loss": 0.171875, "time": 1.368915319442749, "tflops": 5.126510471700781, "tokens_per_sec": 84.73862360393186, "iter": 1280, "memory": 16130, "step": 1280} +{"base_lr": 1.9927242955562996e-06, "lr": 1.9927242955562996e-06, "data_time": 0.008594512939453125, "loss": 1.265625, "time": 1.2644636631011963, "tflops": 2.725014304751526, "tokens_per_sec": 45.07840095630581, "iter": 1290, "memory": 16131, "step": 1290} +{"base_lr": 1.992454687859951e-06, "lr": 1.992454687859951e-06, "data_time": 0.0079498291015625, "loss": 2.453125, "time": 1.0303597450256348, "tflops": 7.6344119256295855, "tokens_per_sec": 126.16952537935136, "iter": 1300, "memory": 16131, "step": 1300} +{"base_lr": 1.992180194257784e-06, "lr": 1.992180194257784e-06, "data_time": 0.009192705154418945, "loss": 2.53125, "time": 1.0323293209075928, "tflops": 4.744663778733934, "tokens_per_sec": 78.46333370509014, "iter": 1310, "memory": 16130, "step": 1310} +{"base_lr": 1.9919008161011454e-06, "lr": 1.9919008161011454e-06, "data_time": 0.008365869522094727, "loss": 1.6796875, "time": 1.2137665748596191, "tflops": 3.6863355941283884, "tokens_per_sec": 60.96724158720359, "iter": 1320, "memory": 16131, "step": 1320} +{"base_lr": 1.9916165547654275e-06, "lr": 1.9916165547654275e-06, "data_time": 0.008443593978881836, "loss": 0.10400390625, "time": 1.3014419078826904, "tflops": 5.066427238472848, "tokens_per_sec": 83.75325808990415, "iter": 1330, "memory": 16131, "step": 1330} +{"base_lr": 1.9913274116500647e-06, "lr": 1.9913274116500647e-06, "data_time": 0.008383035659790039, "loss": 2.015625, "time": 1.035897970199585, "tflops": 5.54658947827404, "tokens_per_sec": 91.70787349028666, "iter": 1340, "memory": 16131, "step": 1340} +{"base_lr": 1.9910333881785216e-06, "lr": 1.9910333881785216e-06, "data_time": 0.008636951446533203, "loss": 0.08837890625, "time": 1.0283820629119873, "tflops": 8.061716594072363, "tokens_per_sec": 133.21897078983935, "iter": 1350, "memory": 16130, "step": 1350} +{"base_lr": 1.9907344857982933e-06, "lr": 1.9907344857982933e-06, "data_time": 0.008021831512451172, "loss": 2.203125, "time": 1.0295426845550537, "tflops": 4.933906468173672, "tokens_per_sec": 81.58962349018235, "iter": 1360, "memory": 16130, "step": 1360} +{"base_lr": 1.9904307059808903e-06, "lr": 1.9904307059808903e-06, "data_time": 0.009438276290893555, "loss": 0.166015625, "time": 1.0723683834075928, "tflops": 6.14868997763028, "tokens_per_sec": 101.644175347222, "iter": 1370, "memory": 16130, "step": 1370} +{"base_lr": 1.9901220502218366e-06, "lr": 1.9901220502218366e-06, "data_time": 0.007943153381347656, "loss": 1.4765625, "time": 1.016953468322754, "tflops": 3.983196007727686, "tokens_per_sec": 65.8830537354243, "iter": 1380, "memory": 16130, "step": 1380} +{"base_lr": 1.9898085200406605e-06, "lr": 1.9898085200406605e-06, "data_time": 0.007818937301635742, "loss": 0.0595703125, "time": 1.3784840106964111, "tflops": 5.090925005687612, "tokens_per_sec": 84.15041386030481, "iter": 1390, "memory": 16131, "step": 1390} +{"base_lr": 1.989490116980887e-06, "lr": 1.989490116980887e-06, "data_time": 0.008350372314453125, "loss": 0.053466796875, "time": 1.0527141094207764, "tflops": 5.803164980011076, "tokens_per_sec": 95.94247773070715, "iter": 1400, "memory": 16133, "step": 1400} +{"base_lr": 1.9891668426100307e-06, "lr": 1.9891668426100307e-06, "data_time": 0.008504390716552734, "loss": 1.671875, "time": 1.0429482460021973, "tflops": 4.754398128569075, "tokens_per_sec": 78.6232685219441, "iter": 1410, "memory": 16130, "step": 1410} +{"base_lr": 1.9888386985195894e-06, "lr": 1.9888386985195894e-06, "data_time": 0.008427619934082031, "loss": 0.11083984375, "time": 1.3761348724365234, "tflops": 4.835457246754789, "tokens_per_sec": 79.93402551100165, "iter": 1420, "memory": 16131, "step": 1420} +{"base_lr": 1.988505686325032e-06, "lr": 1.988505686325032e-06, "data_time": 0.008458375930786133, "loss": 1.9765625, "time": 1.0603210926055908, "tflops": 4.6194078103257326, "tokens_per_sec": 76.39195387585606, "iter": 1430, "memory": 16130, "step": 1430} +{"base_lr": 1.988167807665796e-06, "lr": 1.988167807665796e-06, "data_time": 0.008056879043579102, "loss": 0.06640625, "time": 1.0641679763793945, "tflops": 6.765416029337902, "tokens_per_sec": 111.82445125323201, "iter": 1440, "memory": 16131, "step": 1440} +{"base_lr": 1.9878250642052748e-06, "lr": 1.9878250642052748e-06, "data_time": 0.008192062377929688, "loss": 2.125, "time": 1.0262985229492188, "tflops": 4.3596973276449225, "tokens_per_sec": 72.1037771615203, "iter": 1450, "memory": 16131, "step": 1450} +{"base_lr": 1.9874774576308116e-06, "lr": 1.9874774576308116e-06, "data_time": 0.007958173751831055, "loss": 0.255859375, "time": 1.0145182609558105, "tflops": 6.021649770994188, "tokens_per_sec": 99.55463976049585, "iter": 1460, "memory": 16131, "step": 1460} +{"base_lr": 1.987124989653693e-06, "lr": 1.987124989653693e-06, "data_time": 0.008524656295776367, "loss": 1.9921875, "time": 1.021106481552124, "tflops": 3.7892180475310218, "tokens_per_sec": 62.677106801491135, "iter": 1470, "memory": 16131, "step": 1470} +{"base_lr": 1.9867676620091357e-06, "lr": 1.9867676620091357e-06, "data_time": 0.008939743041992188, "loss": 1.015625, "time": 1.042874813079834, "tflops": 3.88419103066211, "tokens_per_sec": 64.24548676371838, "iter": 1480, "memory": 16131, "step": 1480} +{"base_lr": 1.986405476456283e-06, "lr": 1.986405476456283e-06, "data_time": 0.00806427001953125, "loss": 1.5703125, "time": 1.018507480621338, "tflops": 4.690185531001814, "tokens_per_sec": 77.56447694594121, "iter": 1490, "memory": 16131, "step": 1490} +{"base_lr": 1.986038434778193e-06, "lr": 1.986038434778193e-06, "data_time": 0.008204936981201172, "loss": 2.359375, "time": 1.0337293148040771, "tflops": 6.319915313814909, "tokens_per_sec": 104.47609297059044, "iter": 1500, "memory": 16130, "step": 1500} +{"base_lr": 1.9856665387818316e-06, "lr": 1.9856665387818316e-06, "data_time": 0.008387565612792969, "loss": 0.1669921875, "time": 1.023531436920166, "tflops": 7.2708420241216185, "tokens_per_sec": 120.17217602029906, "iter": 1510, "memory": 16131, "step": 1510} +{"base_lr": 1.985289790298061e-06, "lr": 1.985289790298061e-06, "data_time": 0.008054494857788086, "loss": 1.7890625, "time": 1.0553884506225586, "tflops": 6.534636449619352, "tokens_per_sec": 108.01710018017063, "iter": 1520, "memory": 16131, "step": 1520} +{"base_lr": 1.984908191181634e-06, "lr": 1.984908191181634e-06, "data_time": 0.008415937423706055, "loss": 1.7265625, "time": 1.4113469123840332, "tflops": 2.7843639508299196, "tokens_per_sec": 46.05529613563017, "iter": 1530, "memory": 16130, "step": 1530} +{"base_lr": 1.9845217433111825e-06, "lr": 1.9845217433111825e-06, "data_time": 0.008168458938598633, "loss": 0.08251953125, "time": 1.0408711433410645, "tflops": 6.218353126218738, "tokens_per_sec": 102.79850746600655, "iter": 1540, "memory": 16131, "step": 1540} +{"base_lr": 1.9841304485892094e-06, "lr": 1.9841304485892094e-06, "data_time": 0.007993936538696289, "loss": 1.90625, "time": 1.0413732528686523, "tflops": 4.470951189424084, "tokens_per_sec": 73.94082744857862, "iter": 1550, "memory": 16130, "step": 1550} +{"base_lr": 1.9837343089420786e-06, "lr": 1.9837343089420786e-06, "data_time": 0.008038520812988281, "loss": 0.01202392578125, "time": 1.399256944656372, "tflops": 5.88162225025455, "tokens_per_sec": 97.19444346463581, "iter": 1560, "memory": 16130, "step": 1560} +{"base_lr": 1.9833333263200066e-06, "lr": 1.9833333263200066e-06, "data_time": 0.008396148681640625, "loss": 2.1875, "time": 1.2094852924346924, "tflops": 5.351453522148669, "tokens_per_sec": 88.46738415852967, "iter": 1570, "memory": 16130, "step": 1570} +{"base_lr": 1.982927502697052e-06, "lr": 1.982927502697052e-06, "data_time": 0.008539676666259766, "loss": 2.734375, "time": 1.0442323684692383, "tflops": 7.416899838799375, "tokens_per_sec": 122.57808114827473, "iter": 1580, "memory": 16131, "step": 1580} +{"base_lr": 1.9825168400711044e-06, "lr": 1.9825168400711044e-06, "data_time": 0.008669137954711914, "loss": 0.048828125, "time": 1.0526227951049805, "tflops": 6.148930422450459, "tokens_per_sec": 101.6508482406815, "iter": 1590, "memory": 16131, "step": 1590} +{"base_lr": 1.9821013404638783e-06, "lr": 1.9821013404638783e-06, "data_time": 0.007944107055664062, "loss": 0.08642578125, "time": 1.3017616271972656, "tflops": 5.8564865555058025, "tokens_per_sec": 96.79191440846603, "iter": 1600, "memory": 16131, "step": 1600} +{"base_lr": 1.9816810059208993e-06, "lr": 1.9816810059208993e-06, "data_time": 0.00821828842163086, "loss": 2.0625, "time": 1.0435476303100586, "tflops": 7.8283713994012105, "tokens_per_sec": 129.36639984488247, "iter": 1610, "memory": 16131, "step": 1610} +{"base_lr": 1.981255838511497e-06, "lr": 1.981255838511497e-06, "data_time": 0.007716178894042969, "loss": 0.1572265625, "time": 1.0105760097503662, "tflops": 6.045140192139556, "tokens_per_sec": 99.9430018379807, "iter": 1620, "memory": 16130, "step": 1620} +{"base_lr": 1.980825840328791e-06, "lr": 1.980825840328791e-06, "data_time": 0.008075237274169922, "loss": 0.07568359375, "time": 1.0496459007263184, "tflops": 6.628109985795451, "tokens_per_sec": 109.56075750909372, "iter": 1630, "memory": 16131, "step": 1630} +{"base_lr": 1.980391013489685e-06, "lr": 1.980391013489685e-06, "data_time": 0.008698463439941406, "loss": 0.068359375, "time": 1.0035240650177002, "tflops": 6.208332222964581, "tokens_per_sec": 102.63829597158752, "iter": 1640, "memory": 16130, "step": 1640} +{"base_lr": 1.9799513601348543e-06, "lr": 1.9799513601348543e-06, "data_time": 0.008211135864257812, "loss": 0.0498046875, "time": 1.2908720970153809, "tflops": 5.530326390256934, "tokens_per_sec": 91.41107029328143, "iter": 1650, "memory": 16130, "step": 1650} +{"base_lr": 1.9795068824287355e-06, "lr": 1.9795068824287355e-06, "data_time": 0.008387088775634766, "loss": 1.9140625, "time": 1.349269151687622, "tflops": 4.797044622336176, "tokens_per_sec": 79.30219101658743, "iter": 1660, "memory": 16131, "step": 1660} +{"base_lr": 1.9790575825595147e-06, "lr": 1.9790575825595147e-06, "data_time": 0.008468151092529297, "loss": 1.8046875, "time": 1.0332763195037842, "tflops": 4.037403034370695, "tokens_per_sec": 66.7778779959551, "iter": 1670, "memory": 16131, "step": 1670} +{"base_lr": 1.978603462739118e-06, "lr": 1.978603462739118e-06, "data_time": 0.008030176162719727, "loss": 2.828125, "time": 1.0140016078948975, "tflops": 5.9649880703354565, "tokens_per_sec": 98.61917300851707, "iter": 1680, "memory": 16130, "step": 1680} +{"base_lr": 1.978144525203202e-06, "lr": 1.978144525203202e-06, "data_time": 0.008376359939575195, "loss": 0.041015625, "time": 1.2197661399841309, "tflops": 5.604347080383238, "tokens_per_sec": 92.6407089816229, "iter": 1690, "memory": 16131, "step": 1690} +{"base_lr": 1.9776807722111397e-06, "lr": 1.9776807722111397e-06, "data_time": 0.008318185806274414, "loss": 0.061279296875, "time": 1.0115535259246826, "tflops": 7.416847730925907, "tokens_per_sec": 122.5837257465208, "iter": 1700, "memory": 16131, "step": 1700} +{"base_lr": 1.9772122060460107e-06, "lr": 1.9772122060460107e-06, "data_time": 0.008399248123168945, "loss": 0.1455078125, "time": 1.034928798675537, "tflops": 6.546737695478533, "tokens_per_sec": 108.22000522473155, "iter": 1710, "memory": 16130, "step": 1710} +{"base_lr": 1.97673882901459e-06, "lr": 1.97673882901459e-06, "data_time": 0.008899688720703125, "loss": 0.007568359375, "time": 1.006026268005371, "tflops": 8.120343059460017, "tokens_per_sec": 134.19132709877218, "iter": 1720, "memory": 16131, "step": 1720} +{"base_lr": 1.9762606434473385e-06, "lr": 1.9762606434473385e-06, "data_time": 0.008356332778930664, "loss": 0.2197265625, "time": 0.9961123466491699, "tflops": 7.957741995766927, "tokens_per_sec": 131.51127023025106, "iter": 1730, "memory": 16131, "step": 1730} +{"base_lr": 1.9757776516983885e-06, "lr": 1.9757776516983885e-06, "data_time": 0.009429216384887695, "loss": 1.8671875, "time": 1.336113691329956, "tflops": 3.6658972723802057, "tokens_per_sec": 60.62358355097213, "iter": 1740, "memory": 16131, "step": 1740} +{"base_lr": 1.9752898561455326e-06, "lr": 1.9752898561455326e-06, "data_time": 0.008620262145996094, "loss": 1.9296875, "time": 1.0479419231414795, "tflops": 4.269655435154932, "tokens_per_sec": 70.61460026152504, "iter": 1750, "memory": 16131, "step": 1750} +{"base_lr": 1.974797259190213e-06, "lr": 1.974797259190213e-06, "data_time": 0.008453130722045898, "loss": 0.03857421875, "time": 1.1057560443878174, "tflops": 4.9771234274752985, "tokens_per_sec": 82.2966335673962, "iter": 1760, "memory": 16131, "step": 1760} +{"base_lr": 1.9742998632575115e-06, "lr": 1.9742998632575115e-06, "data_time": 0.008532524108886719, "loss": 0.01092529296875, "time": 1.3153891563415527, "tflops": 5.519382394687554, "tokens_per_sec": 91.2277552398719, "iter": 1770, "memory": 16131, "step": 1770} +{"base_lr": 1.9737976707961333e-06, "lr": 1.9737976707961333e-06, "data_time": 0.008144617080688477, "loss": 2.3125, "time": 1.0543324947357178, "tflops": 3.8993753764581034, "tokens_per_sec": 64.49578319881014, "iter": 1780, "memory": 16131, "step": 1780} +{"base_lr": 1.973290684278398e-06, "lr": 1.973290684278398e-06, "data_time": 0.0085906982421875, "loss": 0.021240234375, "time": 1.0316658020019531, "tflops": 8.564995211554669, "tokens_per_sec": 141.5186969622766, "iter": 1790, "memory": 16131, "step": 1790} +{"base_lr": 1.9727789062002262e-06, "lr": 1.9727789062002262e-06, "data_time": 0.008134126663208008, "loss": 0.004638671875, "time": 1.043532133102417, "tflops": 6.899202101137891, "tokens_per_sec": 114.03577927792162, "iter": 1800, "memory": 16130, "step": 1800} +{"base_lr": 1.972262339081129e-06, "lr": 1.972262339081129e-06, "data_time": 0.008461713790893555, "loss": 0.06005859375, "time": 1.411794900894165, "tflops": 4.498779775654983, "tokens_per_sec": 74.37340929155043, "iter": 1810, "memory": 16131, "step": 1810} +{"base_lr": 1.9717409854641914e-06, "lr": 1.9717409854641914e-06, "data_time": 0.008640527725219727, "loss": 0.03857421875, "time": 1.2722463607788086, "tflops": 5.373167506005642, "tokens_per_sec": 88.81927548272802, "iter": 1820, "memory": 16131, "step": 1820} +{"base_lr": 1.9712148479160645e-06, "lr": 1.9712148479160645e-06, "data_time": 0.008591890335083008, "loss": 0.004364013671875, "time": 1.034998893737793, "tflops": 7.248837190897952, "tokens_per_sec": 119.80689134078861, "iter": 1830, "memory": 16130, "step": 1830} +{"base_lr": 1.970683929026952e-06, "lr": 1.970683929026952e-06, "data_time": 0.008215188980102539, "loss": 0.042236328125, "time": 1.2417731285095215, "tflops": 6.041794834513067, "tokens_per_sec": 99.85720994682431, "iter": 1840, "memory": 16130, "step": 1840} +{"base_lr": 1.9701482314105926e-06, "lr": 1.9701482314105926e-06, "data_time": 0.008553028106689453, "loss": 2.078125, "time": 1.1174554824829102, "tflops": 4.166545385058808, "tokens_per_sec": 68.90654814171417, "iter": 1850, "memory": 16131, "step": 1850} +{"base_lr": 1.969607757704257e-06, "lr": 1.969607757704257e-06, "data_time": 0.008233070373535156, "loss": 0.051513671875, "time": 1.0497658252716064, "tflops": 6.338786404779376, "tokens_per_sec": 104.78527434576645, "iter": 1860, "memory": 16131, "step": 1860} +{"base_lr": 1.9690625105687217e-06, "lr": 1.9690625105687217e-06, "data_time": 0.00906682014465332, "loss": 0.059326171875, "time": 1.0116541385650635, "tflops": 6.158439482298577, "tokens_per_sec": 101.8134519233955, "iter": 1870, "memory": 16131, "step": 1870} +{"base_lr": 1.9685124926882688e-06, "lr": 1.9685124926882688e-06, "data_time": 0.008533000946044922, "loss": 0.408203125, "time": 1.0284817218780518, "tflops": 5.822118967493109, "tokens_per_sec": 96.25839516051435, "iter": 1880, "memory": 16131, "step": 1880} +{"base_lr": 1.9679577067706638e-06, "lr": 1.9679577067706638e-06, "data_time": 0.00857853889465332, "loss": 1.8203125, "time": 1.050865888595581, "tflops": 4.488163774803628, "tokens_per_sec": 74.22450461701452, "iter": 1890, "memory": 16131, "step": 1890} +{"base_lr": 1.967398155547147e-06, "lr": 1.967398155547147e-06, "data_time": 0.00832056999206543, "loss": 2.140625, "time": 1.347440242767334, "tflops": 4.174277387634056, "tokens_per_sec": 69.01975838938152, "iter": 1900, "memory": 16131, "step": 1900} +{"base_lr": 1.966833841772419e-06, "lr": 1.966833841772419e-06, "data_time": 0.008473634719848633, "loss": 0.125, "time": 1.3213961124420166, "tflops": 4.531527589862061, "tokens_per_sec": 74.9207592392317, "iter": 1910, "memory": 16131, "step": 1910} +{"base_lr": 1.966264768224624e-06, "lr": 1.966264768224624e-06, "data_time": 0.008721590042114258, "loss": 0.005767822265625, "time": 1.3417577743530273, "tflops": 4.823899255070914, "tokens_per_sec": 79.74613752583906, "iter": 1920, "memory": 16130, "step": 1920} +{"base_lr": 1.9656909377053414e-06, "lr": 1.9656909377053414e-06, "data_time": 0.00816488265991211, "loss": 0.06396484375, "time": 1.0200226306915283, "tflops": 5.573547179685769, "tokens_per_sec": 92.15481811043757, "iter": 1930, "memory": 16131, "step": 1930} +{"base_lr": 1.965112353039568e-06, "lr": 1.965112353039568e-06, "data_time": 0.008690118789672852, "loss": 2.546875, "time": 1.029493808746338, "tflops": 4.346165940828995, "tokens_per_sec": 71.8799854561936, "iter": 1940, "memory": 16131, "step": 1940} +{"base_lr": 1.964529017075708e-06, "lr": 1.964529017075708e-06, "data_time": 0.008861780166625977, "loss": 0.0294189453125, "time": 1.3012828826904297, "tflops": 6.510810504351376, "tokens_per_sec": 107.5861381580917, "iter": 1950, "memory": 16131, "step": 1950} +{"base_lr": 1.963940932685552e-06, "lr": 1.963940932685552e-06, "data_time": 0.008187532424926758, "loss": 0.0038299560546875, "time": 1.0201799869537354, "tflops": 7.651177343452756, "tokens_per_sec": 126.44827545095102, "iter": 1960, "memory": 16131, "step": 1960} +{"base_lr": 1.9633481027642703e-06, "lr": 1.9633481027642703e-06, "data_time": 0.008329153060913086, "loss": 0.04248046875, "time": 1.0294301509857178, "tflops": 7.641305937639564, "tokens_per_sec": 126.28345874209518, "iter": 1970, "memory": 16131, "step": 1970} +{"base_lr": 1.9627505302303955e-06, "lr": 1.9627505302303955e-06, "data_time": 0.008417129516601562, "loss": 1.7734375, "time": 1.015392780303955, "tflops": 5.241162754552657, "tokens_per_sec": 86.6659697674537, "iter": 1980, "memory": 16131, "step": 1980} +{"base_lr": 1.962148218025809e-06, "lr": 1.962148218025809e-06, "data_time": 0.008054018020629883, "loss": 0.0201416015625, "time": 1.0272703170776367, "tflops": 7.244378875600594, "tokens_per_sec": 119.73479419690507, "iter": 1990, "memory": 16131, "step": 1990} +{"base_lr": 1.961541169115725e-06, "lr": 1.961541169115725e-06, "data_time": 0.008653402328491211, "loss": 0.016845703125, "time": 0.9910385608673096, "tflops": 6.714412137111812, "tokens_per_sec": 110.99467199704345, "iter": 2000, "memory": 16131, "step": 2000} +{"base_lr": 1.960929386488676e-06, "lr": 1.960929386488676e-06, "data_time": 0.008402109146118164, "loss": 0.049072265625, "time": 1.030564546585083, "tflops": 6.868420413762745, "tokens_per_sec": 113.53000681770202, "iter": 2010, "memory": 16131, "step": 2010} +{"base_lr": 1.9603128731564996e-06, "lr": 1.9603128731564996e-06, "data_time": 0.008638620376586914, "loss": 2.34375, "time": 1.0739595890045166, "tflops": 6.816582391576742, "tokens_per_sec": 112.66718155759067, "iter": 2020, "memory": 16131, "step": 2020} +{"base_lr": 1.9596916321543232e-06, "lr": 1.9596916321543232e-06, "data_time": 0.008850574493408203, "loss": 1.71875, "time": 1.4093282222747803, "tflops": 3.9480058188909366, "tokens_per_sec": 65.27932850974813, "iter": 2030, "memory": 16131, "step": 2030} +{"base_lr": 1.9590656665405487e-06, "lr": 1.9590656665405487e-06, "data_time": 0.00847172737121582, "loss": 0.0157470703125, "time": 1.2417936325073242, "tflops": 5.261004369767701, "tokens_per_sec": 86.97097260987609, "iter": 2040, "memory": 16131, "step": 2040} +{"base_lr": 1.9584349793968363e-06, "lr": 1.9584349793968363e-06, "data_time": 0.008553266525268555, "loss": 0.146484375, "time": 1.0497627258300781, "tflops": 6.107978857759302, "tokens_per_sec": 100.97519886323046, "iter": 2050, "memory": 16130, "step": 2050} +{"base_lr": 1.9577995738280926e-06, "lr": 1.9577995738280926e-06, "data_time": 0.008636713027954102, "loss": 0.11376953125, "time": 1.039635181427002, "tflops": 7.041637445959195, "tokens_per_sec": 116.38698089631707, "iter": 2060, "memory": 16131, "step": 2060} +{"base_lr": 1.9571594529624513e-06, "lr": 1.9571594529624513e-06, "data_time": 0.008453845977783203, "loss": 0.0040283203125, "time": 1.0348422527313232, "tflops": 8.655903132023527, "tokens_per_sec": 143.01696670118696, "iter": 2070, "memory": 16131, "step": 2070} +{"base_lr": 1.9565146199512604e-06, "lr": 1.9565146199512604e-06, "data_time": 0.008038759231567383, "loss": 2.796875, "time": 1.0240073204040527, "tflops": 5.847558724778872, "tokens_per_sec": 96.67899635799469, "iter": 2080, "memory": 16131, "step": 2080} +{"base_lr": 1.9558650779690663e-06, "lr": 1.9558650779690663e-06, "data_time": 0.008734464645385742, "loss": 0.00089263916015625, "time": 1.0192437171936035, "tflops": 6.053156192842181, "tokens_per_sec": 100.0742003892335, "iter": 2090, "memory": 16130, "step": 2090} +{"base_lr": 1.9552108302135985e-06, "lr": 1.9552108302135985e-06, "data_time": 0.008261680603027344, "loss": 2.46875, "time": 1.0047180652618408, "tflops": 6.1406693400481265, "tokens_per_sec": 101.52101721522855, "iter": 2100, "memory": 16131, "step": 2100} +{"base_lr": 1.954551879905752e-06, "lr": 1.954551879905752e-06, "data_time": 0.00841069221496582, "loss": 2.375, "time": 1.0048935413360596, "tflops": 3.6095144301692534, "tokens_per_sec": 59.70781732776099, "iter": 2110, "memory": 16131, "step": 2110} +{"base_lr": 1.953888230289574e-06, "lr": 1.953888230289574e-06, "data_time": 0.008430957794189453, "loss": 2.015625, "time": 1.0021629333496094, "tflops": 7.9096969062083495, "tokens_per_sec": 130.71726726313602, "iter": 2120, "memory": 16131, "step": 2120} +{"base_lr": 1.9532198846322444e-06, "lr": 1.9532198846322444e-06, "data_time": 0.008635997772216797, "loss": 2.875, "time": 1.0552515983581543, "tflops": 5.846607035085642, "tokens_per_sec": 96.65941293868038, "iter": 2130, "memory": 16130, "step": 2130} +{"base_lr": 1.952546846224065e-06, "lr": 1.952546846224065e-06, "data_time": 0.008285284042358398, "loss": 2.0625, "time": 0.9988217353820801, "tflops": 4.722022004824602, "tokens_per_sec": 78.09201305584776, "iter": 2140, "memory": 16131, "step": 2140} +{"base_lr": 1.9518691183784373e-06, "lr": 1.9518691183784373e-06, "data_time": 0.008579730987548828, "loss": 0.0068359375, "time": 1.2018277645111084, "tflops": 5.536767861359354, "tokens_per_sec": 91.52725810478789, "iter": 2150, "memory": 16131, "step": 2150} +{"base_lr": 1.951186704431853e-06, "lr": 1.951186704431853e-06, "data_time": 0.008121490478515625, "loss": 0.208984375, "time": 0.9995090961456299, "tflops": 7.627493834274802, "tokens_per_sec": 126.06188426474867, "iter": 2160, "memory": 16131, "step": 2160} +{"base_lr": 1.9504996077438687e-06, "lr": 1.9504996077438687e-06, "data_time": 0.008794546127319336, "loss": 1.5078125, "time": 1.233311653137207, "tflops": 4.315080302561307, "tokens_per_sec": 71.35260562574005, "iter": 2170, "memory": 16130, "step": 2170} +{"base_lr": 1.9498078316970976e-06, "lr": 1.9498078316970976e-06, "data_time": 0.008413553237915039, "loss": 0.01287841796875, "time": 1.0094234943389893, "tflops": 7.432498367176929, "tokens_per_sec": 122.8423953824032, "iter": 2180, "memory": 16130, "step": 2180} +{"base_lr": 1.9491113796971907e-06, "lr": 1.9491113796971907e-06, "data_time": 0.007963180541992188, "loss": 2.453125, "time": 1.2353253364562988, "tflops": 5.8771042230064054, "tokens_per_sec": 97.14040217465255, "iter": 2190, "memory": 16131, "step": 2190} +{"base_lr": 1.948410255172815e-06, "lr": 1.948410255172815e-06, "data_time": 0.008362770080566406, "loss": 0.04443359375, "time": 1.0008857250213623, "tflops": 6.043154920872571, "tokens_per_sec": 99.91150587922087, "iter": 2200, "memory": 16130, "step": 2200} +{"base_lr": 1.9477044615756444e-06, "lr": 1.9477044615756444e-06, "data_time": 0.008219003677368164, "loss": 2.09375, "time": 1.0406105518341064, "tflops": 4.474228110930948, "tokens_per_sec": 73.9950213499289, "iter": 2210, "memory": 16131, "step": 2210} +{"base_lr": 1.946994002380337e-06, "lr": 1.946994002380337e-06, "data_time": 0.008624076843261719, "loss": 2.09375, "time": 1.3004331588745117, "tflops": 5.256704474751399, "tokens_per_sec": 86.89412387616402, "iter": 2220, "memory": 16130, "step": 2220} +{"base_lr": 1.946278881084519e-06, "lr": 1.946278881084519e-06, "data_time": 0.008133649826049805, "loss": 0.00823974609375, "time": 1.0414581298828125, "tflops": 8.135138627901632, "tokens_per_sec": 134.42691163745462, "iter": 2230, "memory": 16131, "step": 2230} +{"base_lr": 1.945559101208772e-06, "lr": 1.945559101208772e-06, "data_time": 0.008620023727416992, "loss": 0.009033203125, "time": 1.3635048866271973, "tflops": 4.613684796677506, "tokens_per_sec": 76.27402073870152, "iter": 2240, "memory": 16131, "step": 2240} +{"base_lr": 1.944834666296607e-06, "lr": 1.944834666296607e-06, "data_time": 0.008150100708007812, "loss": 0.0286865234375, "time": 1.0568130016326904, "tflops": 5.666038297613151, "tokens_per_sec": 93.67787853381756, "iter": 2250, "memory": 16130, "step": 2250} +{"base_lr": 1.944105579914456e-06, "lr": 1.944105579914456e-06, "data_time": 0.008428812026977539, "loss": 1.6796875, "time": 1.0209300518035889, "tflops": 4.797640668900118, "tokens_per_sec": 79.33942179175249, "iter": 2260, "memory": 16131, "step": 2260} +{"base_lr": 1.9433718456516484e-06, "lr": 1.9433718456516484e-06, "data_time": 0.00836324691772461, "loss": 0.2236328125, "time": 1.0054993629455566, "tflops": 7.22042799749231, "tokens_per_sec": 119.3436857566444, "iter": 2270, "memory": 16131, "step": 2270} +{"base_lr": 1.9426334671203958e-06, "lr": 1.9426334671203958e-06, "data_time": 0.008779764175415039, "loss": 2.109375, "time": 1.017517328262329, "tflops": 6.242010991950691, "tokens_per_sec": 103.19234580428339, "iter": 2280, "memory": 16131, "step": 2280} +{"base_lr": 1.9418904479557744e-06, "lr": 1.9418904479557744e-06, "data_time": 0.008271932601928711, "loss": 0.0004825592041015625, "time": 1.0330626964569092, "tflops": 8.377325270043384, "tokens_per_sec": 138.42335077077902, "iter": 2290, "memory": 16131, "step": 2290} +{"base_lr": 1.941142791815707e-06, "lr": 1.941142791815707e-06, "data_time": 0.014037847518920898, "loss": 0.1435546875, "time": 1.0566141605377197, "tflops": 6.641742068289111, "tokens_per_sec": 109.784635046777, "iter": 2300, "memory": 16131, "step": 2300} +{"base_lr": 1.9403905023809418e-06, "lr": 1.9403905023809418e-06, "data_time": 0.008298873901367188, "loss": 0.0283203125, "time": 1.0288848876953125, "tflops": 6.938525494878435, "tokens_per_sec": 114.6872710553691, "iter": 2310, "memory": 16131, "step": 2310} +{"base_lr": 1.939633583355039e-06, "lr": 1.939633583355039e-06, "data_time": 0.008645296096801758, "loss": 0.06298828125, "time": 1.221311092376709, "tflops": 4.50621004586599, "tokens_per_sec": 74.51009048221832, "iter": 2320, "memory": 16131, "step": 2320} +{"base_lr": 1.9388720384643504e-06, "lr": 1.9388720384643504e-06, "data_time": 0.008362531661987305, "loss": 0.00112152099609375, "time": 1.0186176300048828, "tflops": 6.11633905191688, "tokens_per_sec": 101.1174330444341, "iter": 2330, "memory": 16131, "step": 2330} +{"base_lr": 1.938105871458002e-06, "lr": 1.938105871458002e-06, "data_time": 0.008234500885009766, "loss": 0.0157470703125, "time": 1.001795768737793, "tflops": 7.549585857725861, "tokens_per_sec": 124.77593128324776, "iter": 2340, "memory": 16131, "step": 2340} +{"base_lr": 1.9373350861078727e-06, "lr": 1.9373350861078727e-06, "data_time": 0.009712457656860352, "loss": 0.00653076171875, "time": 1.0402562618255615, "tflops": 6.454970571702782, "tokens_per_sec": 106.7044766499148, "iter": 2350, "memory": 16130, "step": 2350} +{"base_lr": 1.9365596862085775e-06, "lr": 1.9365596862085775e-06, "data_time": 0.008053064346313477, "loss": 1.78125, "time": 0.9974122047424316, "tflops": 4.0612346388061145, "tokens_per_sec": 67.17383212413637, "iter": 2360, "memory": 16130, "step": 2360} +{"base_lr": 1.935779675577452e-06, "lr": 1.935779675577452e-06, "data_time": 0.009476423263549805, "loss": 0.0081787109375, "time": 1.0226733684539795, "tflops": 5.855186147771094, "tokens_per_sec": 96.80510224840005, "iter": 2370, "memory": 16131, "step": 2370} +{"base_lr": 1.9349950580545288e-06, "lr": 1.9349950580545288e-06, "data_time": 0.008824348449707031, "loss": 0.275390625, "time": 1.0103471279144287, "tflops": 6.466175383162818, "tokens_per_sec": 106.8939545785893, "iter": 2380, "memory": 16131, "step": 2380} +{"base_lr": 1.93420583750252e-06, "lr": 1.93420583750252e-06, "data_time": 0.009033918380737305, "loss": 0.00653076171875, "time": 1.0302603244781494, "tflops": 6.458796076422789, "tokens_per_sec": 106.76913143831949, "iter": 2390, "memory": 16130, "step": 2390} +{"base_lr": 1.933412017806799e-06, "lr": 1.933412017806799e-06, "data_time": 0.008414983749389648, "loss": 0.010009765625, "time": 1.2150492668151855, "tflops": 5.875435852310991, "tokens_per_sec": 97.11540364877337, "iter": 2400, "memory": 16131, "step": 2400} +{"base_lr": 1.932613602875382e-06, "lr": 1.932613602875382e-06, "data_time": 0.008327722549438477, "loss": 2.046875, "time": 1.2604632377624512, "tflops": 4.65449405769422, "tokens_per_sec": 76.95583424718953, "iter": 2410, "memory": 16131, "step": 2410} +{"base_lr": 1.931810596638906e-06, "lr": 1.931810596638906e-06, "data_time": 0.008376359939575195, "loss": 2.25, "time": 1.0331013202667236, "tflops": 6.968860598732243, "tokens_per_sec": 115.18715315276305, "iter": 2420, "memory": 16131, "step": 2420} +{"base_lr": 1.931003003050614e-06, "lr": 1.931003003050614e-06, "data_time": 0.008258581161499023, "loss": 1.7734375, "time": 1.060689926147461, "tflops": 5.359855049397604, "tokens_per_sec": 88.62156383565309, "iter": 2430, "memory": 16130, "step": 2430} +{"base_lr": 1.9301908260863293e-06, "lr": 1.9301908260863293e-06, "data_time": 0.008459091186523438, "loss": 0.0216064453125, "time": 1.0250370502471924, "tflops": 6.550810583377585, "tokens_per_sec": 108.28876865779978, "iter": 2440, "memory": 16131, "step": 2440} +{"base_lr": 1.9293740697444424e-06, "lr": 1.9293740697444424e-06, "data_time": 0.008559942245483398, "loss": 2.046875, "time": 1.3771519660949707, "tflops": 3.1171485998378516, "tokens_per_sec": 51.55567558842098, "iter": 2450, "memory": 16130, "step": 2450} +{"base_lr": 1.9285527380458867e-06, "lr": 1.9285527380458867e-06, "data_time": 0.008121728897094727, "loss": 0.0022735595703125, "time": 1.2124311923980713, "tflops": 5.388414425439525, "tokens_per_sec": 89.07721995035232, "iter": 2460, "memory": 16130, "step": 2460} +{"base_lr": 1.92772683503412e-06, "lr": 1.92772683503412e-06, "data_time": 0.008621931076049805, "loss": 2.953125, "time": 1.0254237651824951, "tflops": 3.065288883556872, "tokens_per_sec": 50.710742003034056, "iter": 2470, "memory": 16131, "step": 2470} +{"base_lr": 1.9268963647751064e-06, "lr": 1.9268963647751064e-06, "data_time": 0.008876323699951172, "loss": 0.00115203857421875, "time": 1.015864372253418, "tflops": 7.743347379791712, "tokens_per_sec": 127.96983883931522, "iter": 2480, "memory": 16131, "step": 2480} +{"base_lr": 1.9260613313572934e-06, "lr": 1.9260613313572934e-06, "data_time": 0.008402824401855469, "loss": 1.703125, "time": 1.027975082397461, "tflops": 4.646989144606869, "tokens_per_sec": 76.85011179033445, "iter": 2490, "memory": 16131, "step": 2490} +{"base_lr": 1.925221738891594e-06, "lr": 1.925221738891594e-06, "data_time": 0.00833892822265625, "loss": 1.8984375, "time": 1.0083560943603516, "tflops": 4.677373637894455, "tokens_per_sec": 77.3536258035925, "iter": 2500, "memory": 16131, "step": 2500} +{"base_lr": 1.9243775915113664e-06, "lr": 1.9243775915113664e-06, "data_time": 0.008102178573608398, "loss": 0.05810546875, "time": 1.0337862968444824, "tflops": 6.729793669532332, "tokens_per_sec": 111.24155964430312, "iter": 2510, "memory": 16130, "step": 2510} +{"base_lr": 1.9235288933723904e-06, "lr": 1.9235288933723904e-06, "data_time": 0.00839996337890625, "loss": 1.875, "time": 1.0076220035552979, "tflops": 4.500571965275297, "tokens_per_sec": 74.43267389486854, "iter": 2520, "memory": 16131, "step": 2520} +{"base_lr": 1.9226756486528515e-06, "lr": 1.9226756486528515e-06, "data_time": 0.00860142707824707, "loss": 2.421875, "time": 1.0062429904937744, "tflops": 7.0344346607488335, "tokens_per_sec": 116.2741018871302, "iter": 2530, "memory": 16130, "step": 2530} +{"base_lr": 1.9218178615533173e-06, "lr": 1.9218178615533173e-06, "data_time": 0.008332490921020508, "loss": 2.109375, "time": 1.016385793685913, "tflops": 6.308557808330728, "tokens_per_sec": 104.29110743026793, "iter": 2540, "memory": 16130, "step": 2540} +{"base_lr": 1.920955536296719e-06, "lr": 1.920955536296719e-06, "data_time": 0.008226871490478516, "loss": 0.03076171875, "time": 1.0156888961791992, "tflops": 6.551456224898448, "tokens_per_sec": 108.30087875695774, "iter": 2550, "memory": 16131, "step": 2550} +{"base_lr": 1.9200886771283267e-06, "lr": 1.9200886771283267e-06, "data_time": 0.008177042007446289, "loss": 0.00122833251953125, "time": 1.3182179927825928, "tflops": 4.955994958907008, "tokens_per_sec": 81.92878612735639, "iter": 2560, "memory": 16131, "step": 2560} +{"base_lr": 1.9192172883157347e-06, "lr": 1.9192172883157347e-06, "data_time": 0.008335113525390625, "loss": 1.6328125, "time": 1.0303852558135986, "tflops": 4.4598865651731225, "tokens_per_sec": 73.75881940383178, "iter": 2570, "memory": 16130, "step": 2570} +{"base_lr": 1.918341374148835e-06, "lr": 1.918341374148835e-06, "data_time": 0.008873701095581055, "loss": 2.234375, "time": 1.0407707691192627, "tflops": 7.092180257475193, "tokens_per_sec": 117.22081712875499, "iter": 2580, "memory": 16130, "step": 2580} +{"base_lr": 1.9174609389397977e-06, "lr": 1.9174609389397977e-06, "data_time": 0.007714509963989258, "loss": 2.25, "time": 1.0307807922363281, "tflops": 6.102928782731531, "tokens_per_sec": 100.89439072129598, "iter": 2590, "memory": 16130, "step": 2590} +{"base_lr": 1.9165759870230514e-06, "lr": 1.9165759870230514e-06, "data_time": 0.008625507354736328, "loss": 1.8203125, "time": 1.0442044734954834, "tflops": 4.16901768705807, "tokens_per_sec": 68.95201258706585, "iter": 2600, "memory": 16131, "step": 2600} +{"base_lr": 1.9156865227552605e-06, "lr": 1.9156865227552605e-06, "data_time": 0.00863790512084961, "loss": 2.28125, "time": 1.2559030055999756, "tflops": 6.118591435550292, "tokens_per_sec": 101.12245884723228, "iter": 2610, "memory": 16131, "step": 2610} +{"base_lr": 1.9147925505153032e-06, "lr": 1.9147925505153032e-06, "data_time": 0.008528947830200195, "loss": 0.0120849609375, "time": 1.0055243968963623, "tflops": 5.3528146405595365, "tokens_per_sec": 88.51102994081262, "iter": 2620, "memory": 16131, "step": 2620} +{"base_lr": 1.9138940747042515e-06, "lr": 1.9138940747042515e-06, "data_time": 0.00832986831665039, "loss": 0.1650390625, "time": 1.0259222984313965, "tflops": 7.37204287260004, "tokens_per_sec": 121.84158604506334, "iter": 2630, "memory": 16131, "step": 2630} +{"base_lr": 1.9129910997453476e-06, "lr": 1.9129910997453476e-06, "data_time": 0.008359670639038086, "loss": 0.0128173828125, "time": 1.0274548530578613, "tflops": 6.2995510789817635, "tokens_per_sec": 104.14082884658886, "iter": 2640, "memory": 16131, "step": 2640} +{"base_lr": 1.9120836300839846e-06, "lr": 1.9120836300839846e-06, "data_time": 0.008369207382202148, "loss": 1.7421875, "time": 1.0133287906646729, "tflops": 4.176600301368867, "tokens_per_sec": 69.07925704352662, "iter": 2650, "memory": 16131, "step": 2650} +{"base_lr": 1.9111716701876813e-06, "lr": 1.9111716701876813e-06, "data_time": 0.008493185043334961, "loss": 0.027099609375, "time": 1.0382623672485352, "tflops": 6.759138095884927, "tokens_per_sec": 111.72513196957725, "iter": 2660, "memory": 16131, "step": 2660} +{"base_lr": 1.9102552245460656e-06, "lr": 1.9102552245460656e-06, "data_time": 0.00812530517578125, "loss": 2.140625, "time": 1.2975053787231445, "tflops": 5.548754712979616, "tokens_per_sec": 91.71445602561924, "iter": 2670, "memory": 16130, "step": 2670} +{"base_lr": 1.9093342976708457e-06, "lr": 1.9093342976708457e-06, "data_time": 0.007243633270263672, "loss": 0.1142578125, "time": 1.0025126934051514, "tflops": 7.121051006666681, "tokens_per_sec": 117.70424531891113, "iter": 2680, "memory": 16131, "step": 2680} +{"base_lr": 1.9084088940957915e-06, "lr": 1.9084088940957915e-06, "data_time": 0.008310794830322266, "loss": 0.00083160400390625, "time": 1.0242853164672852, "tflops": 7.679687093672752, "tokens_per_sec": 126.91776198475375, "iter": 2690, "memory": 16131, "step": 2690} +{"base_lr": 1.907479018376715e-06, "lr": 1.907479018376715e-06, "data_time": 0.008184432983398438, "loss": 0.0108642578125, "time": 1.0436317920684814, "tflops": 6.434092568035671, "tokens_per_sec": 106.3593509161802, "iter": 2700, "memory": 16131, "step": 2700} +{"base_lr": 1.9065446750914408e-06, "lr": 1.9065446750914408e-06, "data_time": 0.008382797241210938, "loss": 0.005767822265625, "time": 1.2310781478881836, "tflops": 6.045055220365987, "tokens_per_sec": 99.9124224655411, "iter": 2710, "memory": 16131, "step": 2710} +{"base_lr": 1.9056058688397899e-06, "lr": 1.9056058688397899e-06, "data_time": 0.00830221176147461, "loss": 1.9921875, "time": 1.0236475467681885, "tflops": 3.8980344746527402, "tokens_per_sec": 64.47531692749872, "iter": 2720, "memory": 16131, "step": 2720} +{"base_lr": 1.9046626042435536e-06, "lr": 1.9046626042435536e-06, "data_time": 0.008572816848754883, "loss": 0.00124359130859375, "time": 1.0770492553710938, "tflops": 6.2344628378209075, "tokens_per_sec": 103.05935354985436, "iter": 2730, "memory": 16130, "step": 2730} +{"base_lr": 1.9037148859464727e-06, "lr": 1.9037148859464727e-06, "data_time": 0.008700370788574219, "loss": 0.08203125, "time": 1.0122675895690918, "tflops": 6.513752686868415, "tokens_per_sec": 107.67903775946446, "iter": 2740, "memory": 16131, "step": 2740} +{"base_lr": 1.9027627186142135e-06, "lr": 1.9027627186142135e-06, "data_time": 0.008783578872680664, "loss": 0.039306640625, "time": 1.1433308124542236, "tflops": 6.402988482361663, "tokens_per_sec": 105.83113713183403, "iter": 2750, "memory": 16131, "step": 2750} +{"base_lr": 1.901806106934345e-06, "lr": 1.901806106934345e-06, "data_time": 0.008371114730834961, "loss": 0.01611328125, "time": 1.1033267974853516, "tflops": 6.525300664965549, "tokens_per_sec": 107.85562380167971, "iter": 2760, "memory": 16130, "step": 2760} +{"base_lr": 1.900845055616315e-06, "lr": 1.900845055616315e-06, "data_time": 0.00811767578125, "loss": 2.109375, "time": 0.9974806308746338, "tflops": 5.760212884571165, "tokens_per_sec": 95.23994457577055, "iter": 2770, "memory": 16133, "step": 2770} +{"base_lr": 1.899879569391431e-06, "lr": 1.899879569391431e-06, "data_time": 0.008313894271850586, "loss": 0.052001953125, "time": 1.0079996585845947, "tflops": 6.481234067224875, "tokens_per_sec": 107.1428934326659, "iter": 2780, "memory": 16131, "step": 2780} +{"base_lr": 1.8989096530128297e-06, "lr": 1.8989096530128297e-06, "data_time": 0.008834600448608398, "loss": 1.9296875, "time": 1.0034949779510498, "tflops": 7.899197532225983, "tokens_per_sec": 130.5437524633627, "iter": 2790, "memory": 16131, "step": 2790} +{"base_lr": 1.8979353112554606e-06, "lr": 1.8979353112554606e-06, "data_time": 0.008817434310913086, "loss": 0.1357421875, "time": 1.0246613025665283, "tflops": 7.321969176226951, "tokens_per_sec": 121.01559772901447, "iter": 2800, "memory": 16130, "step": 2800} +{"base_lr": 1.8969565489160584e-06, "lr": 1.8969565489160584e-06, "data_time": 0.008960723876953125, "loss": 2.53125, "time": 1.0197510719299316, "tflops": 5.040649548038081, "tokens_per_sec": 83.35367555833969, "iter": 2810, "memory": 16131, "step": 2810} +{"base_lr": 1.8959733708131212e-06, "lr": 1.8959733708131212e-06, "data_time": 0.008727550506591797, "loss": 0.00107574462890625, "time": 1.0417559146881104, "tflops": 6.910965403503897, "tokens_per_sec": 114.23021297221334, "iter": 2820, "memory": 16130, "step": 2820} +{"base_lr": 1.8949857817868858e-06, "lr": 1.8949857817868858e-06, "data_time": 0.00870060920715332, "loss": 0.03857421875, "time": 1.2218432426452637, "tflops": 5.743583526149363, "tokens_per_sec": 94.93852889734661, "iter": 2830, "memory": 16131, "step": 2830} +{"base_lr": 1.8939937866993037e-06, "lr": 1.8939937866993037e-06, "data_time": 0.009013891220092773, "loss": 2.34375, "time": 1.0129992961883545, "tflops": 9.86046841783084, "tokens_per_sec": 162.88264031444837, "iter": 2840, "memory": 16131, "step": 2840} +{"base_lr": 1.892997390434018e-06, "lr": 1.892997390434018e-06, "data_time": 0.008423089981079102, "loss": 2.09375, "time": 1.2716872692108154, "tflops": 4.946799356992062, "tokens_per_sec": 81.78111279234446, "iter": 2850, "memory": 16131, "step": 2850} +{"base_lr": 1.8919965978963395e-06, "lr": 1.8919965978963395e-06, "data_time": 0.00863194465637207, "loss": 0.07373046875, "time": 1.0067288875579834, "tflops": 6.489415181893479, "tokens_per_sec": 107.27813747539092, "iter": 2860, "memory": 16131, "step": 2860} +{"base_lr": 1.8909914140132209e-06, "lr": 1.8909914140132209e-06, "data_time": 0.008263587951660156, "loss": 0.1982421875, "time": 1.0197596549987793, "tflops": 6.822360976971093, "tokens_per_sec": 112.77167069335066, "iter": 2870, "memory": 16130, "step": 2870} +{"base_lr": 1.8899818437332347e-06, "lr": 1.8899818437332347e-06, "data_time": 0.008397579193115234, "loss": 2.53125, "time": 1.331874132156372, "tflops": 3.3139979326923883, "tokens_per_sec": 54.80998409493431, "iter": 2880, "memory": 16131, "step": 2880} +{"base_lr": 1.888967892026548e-06, "lr": 1.888967892026548e-06, "data_time": 0.008614301681518555, "loss": 0.058349609375, "time": 1.004462480545044, "tflops": 6.745306579045905, "tokens_per_sec": 111.5024226082738, "iter": 2890, "memory": 16130, "step": 2890} +{"base_lr": 1.887949563884898e-06, "lr": 1.887949563884898e-06, "data_time": 0.00799417495727539, "loss": 1.625, "time": 1.007993459701538, "tflops": 3.8385121164315996, "tokens_per_sec": 63.49247545602785, "iter": 2900, "memory": 16130, "step": 2900} +{"base_lr": 1.8869268643215662e-06, "lr": 1.8869268643215662e-06, "data_time": 0.008663177490234375, "loss": 1.6484375, "time": 1.2312133312225342, "tflops": 2.9951589856936653, "tokens_per_sec": 49.54462273356028, "iter": 2910, "memory": 16131, "step": 2910} +{"base_lr": 1.885899798371356e-06, "lr": 1.885899798371356e-06, "data_time": 0.00827646255493164, "loss": 0.005126953125, "time": 1.0147085189819336, "tflops": 7.334062191608708, "tokens_per_sec": 121.21707633171918, "iter": 2920, "memory": 16130, "step": 2920} +{"base_lr": 1.8848683710905655e-06, "lr": 1.8848683710905655e-06, "data_time": 0.008245229721069336, "loss": 0.000904083251953125, "time": 1.2806987762451172, "tflops": 5.952804523251072, "tokens_per_sec": 98.38379042519367, "iter": 2930, "memory": 16131, "step": 2930} +{"base_lr": 1.8838325875569657e-06, "lr": 1.8838325875569657e-06, "data_time": 0.008290767669677734, "loss": 0.263671875, "time": 1.005242109298706, "tflops": 6.137468139938975, "tokens_per_sec": 101.46809316519528, "iter": 2940, "memory": 16131, "step": 2940} +{"base_lr": 1.882792452869772e-06, "lr": 1.882792452869772e-06, "data_time": 0.008353948593139648, "loss": 0.004730224609375, "time": 1.0201773643493652, "tflops": 7.235343734579261, "tokens_per_sec": 119.58704854982534, "iter": 2950, "memory": 16131, "step": 2950} +{"base_lr": 1.881747972149621e-06, "lr": 1.881747972149621e-06, "data_time": 0.008565902709960938, "loss": 0.00092315673828125, "time": 1.041926383972168, "tflops": 6.677216915872976, "tokens_per_sec": 110.37248098226631, "iter": 2960, "memory": 16131, "step": 2960} +{"base_lr": 1.8806991505385454e-06, "lr": 1.8806991505385454e-06, "data_time": 0.00763249397277832, "loss": 1.609375, "time": 0.9828426837921143, "tflops": 7.695171661461969, "tokens_per_sec": 127.18210356675165, "iter": 2970, "memory": 16131, "step": 2970} +{"base_lr": 1.8796459931999485e-06, "lr": 1.8796459931999485e-06, "data_time": 0.008499860763549805, "loss": 2.65625, "time": 1.0330471992492676, "tflops": 5.268814643232371, "tokens_per_sec": 87.12089831453719, "iter": 2980, "memory": 16131, "step": 2980} +{"base_lr": 1.8785885053185789e-06, "lr": 1.8785885053185789e-06, "data_time": 0.008675336837768555, "loss": 1.2578125, "time": 1.0012948513031006, "tflops": 3.803763049830437, "tokens_per_sec": 62.91852985955925, "iter": 2990, "memory": 16131, "step": 2990} +{"base_lr": 1.877526692100503e-06, "lr": 1.877526692100503e-06, "data_time": 0.008327245712280273, "loss": 1.4921875, "time": 0.9908673763275146, "tflops": 6.165379746779132, "tokens_per_sec": 101.93089651839966, "iter": 3000, "memory": 16131, "step": 3000} +{"base_lr": 1.8764605587730838e-06, "lr": 1.8764605587730838e-06, "data_time": 0.008341312408447266, "loss": 0.0096435546875, "time": 0.9925103187561035, "tflops": 7.864480454174759, "tokens_per_sec": 129.97345978381722, "iter": 3010, "memory": 16131, "step": 3010} +{"base_lr": 1.8753901105849497e-06, "lr": 1.8753901105849497e-06, "data_time": 0.008634090423583984, "loss": 0.166015625, "time": 1.3163948059082031, "tflops": 5.192965495373965, "tokens_per_sec": 85.8405088600707, "iter": 3020, "memory": 16130, "step": 3020} +{"base_lr": 1.874315352805973e-06, "lr": 1.874315352805973e-06, "data_time": 0.008328914642333984, "loss": 0.036376953125, "time": 1.198322057723999, "tflops": 6.66550443255299, "tokens_per_sec": 110.15402674853578, "iter": 3030, "memory": 16131, "step": 3030} +{"base_lr": 1.873236290727242e-06, "lr": 1.873236290727242e-06, "data_time": 0.008420228958129883, "loss": 0.0004596710205078125, "time": 1.0051333904266357, "tflops": 8.18786924988988, "tokens_per_sec": 135.30542442942678, "iter": 3040, "memory": 16131, "step": 3040} +{"base_lr": 1.8721529296610352e-06, "lr": 1.8721529296610352e-06, "data_time": 0.008686304092407227, "loss": 1.1953125, "time": 1.2687666416168213, "tflops": 4.719499034934189, "tokens_per_sec": 78.02853318539631, "iter": 3050, "memory": 16131, "step": 3050} +{"base_lr": 1.8710652749407948e-06, "lr": 1.8710652749407948e-06, "data_time": 0.008723020553588867, "loss": 2.0625, "time": 1.007596492767334, "tflops": 5.942798514731178, "tokens_per_sec": 98.25361710817508, "iter": 3060, "memory": 16131, "step": 3060} +{"base_lr": 1.8699733319211011e-06, "lr": 1.8699733319211011e-06, "data_time": 0.007995128631591797, "loss": 0.10498046875, "time": 1.0178377628326416, "tflops": 6.121025390167493, "tokens_per_sec": 101.19490920954817, "iter": 3070, "memory": 16131, "step": 3070} +{"base_lr": 1.868877105977647e-06, "lr": 1.868877105977647e-06, "data_time": 0.0085296630859375, "loss": 0.006134033203125, "time": 0.9983758926391602, "tflops": 6.9077988448962895, "tokens_per_sec": 114.18544942880395, "iter": 3080, "memory": 16130, "step": 3080} +{"base_lr": 1.867776602507209e-06, "lr": 1.867776602507209e-06, "data_time": 0.008931875228881836, "loss": 0.006072998046875, "time": 1.0217390060424805, "tflops": 6.749844918476679, "tokens_per_sec": 111.5744816687058, "iter": 3090, "memory": 16130, "step": 3090} +{"base_lr": 1.866671826927623e-06, "lr": 1.866671826927623e-06, "data_time": 0.008536577224731445, "loss": 1.25, "time": 1.0126936435699463, "tflops": 5.972692267596186, "tokens_per_sec": 98.74654653443008, "iter": 3100, "memory": 16131, "step": 3100} +{"base_lr": 1.8655627846777565e-06, "lr": 1.8655627846777565e-06, "data_time": 0.008403301239013672, "loss": 2.65625, "time": 1.1206505298614502, "tflops": 5.343274090457947, "tokens_per_sec": 88.34154570216583, "iter": 3110, "memory": 16131, "step": 3110} +{"base_lr": 1.8644494812174807e-06, "lr": 1.8644494812174807e-06, "data_time": 0.0074977874755859375, "loss": 1.6015625, "time": 1.0988802909851074, "tflops": 4.95316392077216, "tokens_per_sec": 81.90155082246153, "iter": 3120, "memory": 16131, "step": 3120} +{"base_lr": 1.863331922027648e-06, "lr": 1.863331922027648e-06, "data_time": 0.008594989776611328, "loss": 0.0028228759765625, "time": 1.0892095565795898, "tflops": 7.221925916455164, "tokens_per_sec": 119.35260686484932, "iter": 3130, "memory": 16131, "step": 3130} +{"base_lr": 1.862210112610059e-06, "lr": 1.862210112610059e-06, "data_time": 0.008387565612792969, "loss": 0.04150390625, "time": 1.0166444778442383, "tflops": 6.128209934883857, "tokens_per_sec": 101.31368658816388, "iter": 3140, "memory": 16130, "step": 3140} +{"base_lr": 1.8610840584874396e-06, "lr": 1.8610840584874396e-06, "data_time": 0.008687973022460938, "loss": 0.01190185546875, "time": 1.229442834854126, "tflops": 5.905224338905935, "tokens_per_sec": 97.60518878792803, "iter": 3150, "memory": 16132, "step": 3150} +{"base_lr": 1.8599537652034127e-06, "lr": 1.8599537652034127e-06, "data_time": 0.008265256881713867, "loss": 1.3125, "time": 1.0176992416381836, "tflops": 4.51548077402347, "tokens_per_sec": 74.6782515800922, "iter": 3160, "memory": 16131, "step": 3160} +{"base_lr": 1.8588192383224714e-06, "lr": 1.8588192383224714e-06, "data_time": 0.008431196212768555, "loss": 0.37890625, "time": 0.9914085865020752, "tflops": 6.100923047009779, "tokens_per_sec": 100.86658655310104, "iter": 3170, "memory": 16131, "step": 3170} +{"base_lr": 1.8576804834299493e-06, "lr": 1.8576804834299493e-06, "data_time": 0.008219003677368164, "loss": 2.03125, "time": 1.2132413387298584, "tflops": 3.189138866962338, "tokens_per_sec": 52.751252332820115, "iter": 3180, "memory": 16131, "step": 3180} +{"base_lr": 1.8565375061319957e-06, "lr": 1.8565375061319957e-06, "data_time": 0.008380651473999023, "loss": 0.04443359375, "time": 1.0168750286102295, "tflops": 6.603391142656531, "tokens_per_sec": 109.15795636322719, "iter": 3190, "memory": 16131, "step": 3190} +{"base_lr": 1.855390312055548e-06, "lr": 1.855390312055548e-06, "data_time": 0.0091094970703125, "loss": 0.00640869140625, "time": 1.4615683555603027, "tflops": 4.884440742995117, "tokens_per_sec": 80.73519076340642, "iter": 3200, "memory": 16131, "step": 3200} +{"base_lr": 1.8542389068483006e-06, "lr": 1.8542389068483006e-06, "data_time": 0.008509397506713867, "loss": 1.671875, "time": 1.0110056400299072, "tflops": 4.8447361149145145, "tokens_per_sec": 80.11824740910818, "iter": 3210, "memory": 16131, "step": 3210} +{"base_lr": 1.8530832961786823e-06, "lr": 1.8530832961786823e-06, "data_time": 0.008330821990966797, "loss": 2.09375, "time": 1.0362465381622314, "tflops": 4.55148272122194, "tokens_per_sec": 75.2716628016501, "iter": 3220, "memory": 16131, "step": 3220} +{"base_lr": 1.851923485735823e-06, "lr": 1.851923485735823e-06, "data_time": 0.008130073547363281, "loss": 2.890625, "time": 1.0028879642486572, "tflops": 7.360078258426983, "tokens_per_sec": 121.64868295261495, "iter": 3230, "memory": 16131, "step": 3230} +{"base_lr": 1.850759481229531e-06, "lr": 1.850759481229531e-06, "data_time": 0.008363485336303711, "loss": 0.059326171875, "time": 1.0209097862243652, "tflops": 6.992727585628297, "tokens_per_sec": 115.58318040645322, "iter": 3240, "memory": 16131, "step": 3240} +{"base_lr": 1.84959128839026e-06, "lr": 1.84959128839026e-06, "data_time": 0.00859689712524414, "loss": 2.640625, "time": 1.0034501552581787, "tflops": 4.458966800116076, "tokens_per_sec": 73.74556634642875, "iter": 3250, "memory": 16131, "step": 3250} +{"base_lr": 1.8484189129690838e-06, "lr": 1.8484189129690838e-06, "data_time": 0.008162736892700195, "loss": 0.010986328125, "time": 0.9975576400756836, "tflops": 6.852729637608217, "tokens_per_sec": 113.27666238044505, "iter": 3260, "memory": 16130, "step": 3260} +{"base_lr": 1.8472423607376672e-06, "lr": 1.8472423607376672e-06, "data_time": 0.008424043655395508, "loss": 0.000789642333984375, "time": 1.0169463157653809, "tflops": 7.556305829340649, "tokens_per_sec": 124.88368169591286, "iter": 3270, "memory": 16131, "step": 3270} +{"base_lr": 1.8460616374882385e-06, "lr": 1.8460616374882385e-06, "data_time": 0.008785724639892578, "loss": 0.01397705078125, "time": 1.0128226280212402, "tflops": 7.048562924108799, "tokens_per_sec": 116.50608579946623, "iter": 3280, "memory": 16131, "step": 3280} +{"base_lr": 1.8448767490335583e-06, "lr": 1.8448767490335583e-06, "data_time": 0.008636713027954102, "loss": 0.0030975341796875, "time": 1.0191948413848877, "tflops": 6.528919762183697, "tokens_per_sec": 107.92833277140949, "iter": 3290, "memory": 16131, "step": 3290} +{"base_lr": 1.843687701206895e-06, "lr": 1.843687701206895e-06, "data_time": 0.008315086364746094, "loss": 2.484375, "time": 1.0143027305603027, "tflops": 4.530601388420754, "tokens_per_sec": 74.92832042159891, "iter": 3300, "memory": 16131, "step": 3300} +{"base_lr": 1.8424944998619918e-06, "lr": 1.8424944998619918e-06, "data_time": 0.007941484451293945, "loss": 0.06396484375, "time": 1.2411119937896729, "tflops": 5.11747076758266, "tokens_per_sec": 84.6015512905513, "iter": 3310, "memory": 16130, "step": 3310} +{"base_lr": 1.8412971508730406e-06, "lr": 1.8412971508730406e-06, "data_time": 0.008179187774658203, "loss": 1.9375, "time": 1.0113885402679443, "tflops": 6.818922267122079, "tokens_per_sec": 112.71632558708404, "iter": 3320, "memory": 16132, "step": 3320} +{"base_lr": 1.8400956601346525e-06, "lr": 1.8400956601346525e-06, "data_time": 0.013824224472045898, "loss": 0.02685546875, "time": 1.069453239440918, "tflops": 5.032838757831746, "tokens_per_sec": 83.22009482756219, "iter": 3330, "memory": 16131, "step": 3330} +{"base_lr": 1.8388900335618296e-06, "lr": 1.8388900335618296e-06, "data_time": 0.013128280639648438, "loss": 1.40625, "time": 1.042494535446167, "tflops": 4.756467319864717, "tokens_per_sec": 78.65748664555538, "iter": 3340, "memory": 16131, "step": 3340} +{"base_lr": 1.8376802770899332e-06, "lr": 1.8376802770899332e-06, "data_time": 0.008023500442504883, "loss": 0.02734375, "time": 1.2347779273986816, "tflops": 8.040294879954104, "tokens_per_sec": 132.8174049445211, "iter": 3350, "memory": 16131, "step": 3350} +{"base_lr": 1.8364663966746569e-06, "lr": 1.8364663966746569e-06, "data_time": 0.008185625076293945, "loss": 0.0026092529296875, "time": 0.9930317401885986, "tflops": 7.55518496522533, "tokens_per_sec": 124.87012749091464, "iter": 3360, "memory": 16130, "step": 3360} +{"base_lr": 1.8352483982919973e-06, "lr": 1.8352483982919973e-06, "data_time": 0.008586883544921875, "loss": 1.4296875, "time": 1.0328128337860107, "tflops": 5.445894118058487, "tokens_per_sec": 90.04535667803164, "iter": 3370, "memory": 16130, "step": 3370} +{"base_lr": 1.8340262879382225e-06, "lr": 1.8340262879382225e-06, "data_time": 0.00861358642578125, "loss": 0.000843048095703125, "time": 1.2011394500732422, "tflops": 6.498477759555344, "tokens_per_sec": 107.3980210974059, "iter": 3380, "memory": 16130, "step": 3380} +{"base_lr": 1.8328000716298459e-06, "lr": 1.8328000716298459e-06, "data_time": 0.008660554885864258, "loss": 2.03125, "time": 1.298144817352295, "tflops": 4.006294047626734, "tokens_per_sec": 66.24838681352973, "iter": 3390, "memory": 16131, "step": 3390} +{"base_lr": 1.8315697554035939e-06, "lr": 1.8315697554035939e-06, "data_time": 0.008432626724243164, "loss": 0.212890625, "time": 1.0252318382263184, "tflops": 6.254125453347775, "tokens_per_sec": 103.39124873772917, "iter": 3400, "memory": 16131, "step": 3400} +{"base_lr": 1.8303353453163766e-06, "lr": 1.8303353453163766e-06, "data_time": 0.008512258529663086, "loss": 0.00958251953125, "time": 1.0167131423950195, "tflops": 7.259996545256517, "tokens_per_sec": 119.99451459090102, "iter": 3410, "memory": 16131, "step": 3410} +{"base_lr": 1.8290968474452586e-06, "lr": 1.8290968474452586e-06, "data_time": 0.00856161117553711, "loss": 0.04541015625, "time": 1.0247278213500977, "tflops": 7.084940606089481, "tokens_per_sec": 117.10426661567624, "iter": 3420, "memory": 16131, "step": 3420} +{"base_lr": 1.8278542678874304e-06, "lr": 1.8278542678874304e-06, "data_time": 0.008669614791870117, "loss": 1.9296875, "time": 1.0872721672058105, "tflops": 4.393554063932335, "tokens_per_sec": 72.65890030363792, "iter": 3430, "memory": 16131, "step": 3430} +{"base_lr": 1.8266076127601745e-06, "lr": 1.8266076127601745e-06, "data_time": 0.007964611053466797, "loss": 1.6171875, "time": 1.017859935760498, "tflops": 5.347429463846558, "tokens_per_sec": 88.42081001318489, "iter": 3440, "memory": 16130, "step": 3440} +{"base_lr": 1.825356888200838e-06, "lr": 1.825356888200838e-06, "data_time": 0.008712291717529297, "loss": 0.03125, "time": 1.0141584873199463, "tflops": 6.38214283971542, "tokens_per_sec": 105.50619191942745, "iter": 3450, "memory": 16130, "step": 3450} +{"base_lr": 1.8241021003668036e-06, "lr": 1.8241021003668036e-06, "data_time": 0.00841379165649414, "loss": 2.65625, "time": 1.0410552024841309, "tflops": 4.879344820972211, "tokens_per_sec": 80.68736393563121, "iter": 3460, "memory": 16130, "step": 3460} +{"base_lr": 1.8228432554354567e-06, "lr": 1.8228432554354567e-06, "data_time": 0.008056640625, "loss": 0.06494140625, "time": 1.2178220748901367, "tflops": 5.762548458178777, "tokens_per_sec": 95.25200962576527, "iter": 3470, "memory": 16130, "step": 3470} +{"base_lr": 1.8215803596041563e-06, "lr": 1.8215803596041563e-06, "data_time": 0.008077144622802734, "loss": 0.00103759765625, "time": 1.0097570419311523, "tflops": 7.730154560245664, "tokens_per_sec": 127.75350370734802, "iter": 3480, "memory": 16131, "step": 3480} +{"base_lr": 1.820313419090203e-06, "lr": 1.820313419090203e-06, "data_time": 0.00813150405883789, "loss": 0.000782012939453125, "time": 1.1858720779418945, "tflops": 5.713438662106916, "tokens_per_sec": 94.44526275910287, "iter": 3490, "memory": 16131, "step": 3490} +{"base_lr": 1.8190424401308116e-06, "lr": 1.8190424401308116e-06, "data_time": 0.008548974990844727, "loss": 0.0218505859375, "time": 1.0555801391601562, "tflops": 5.959549192192201, "tokens_per_sec": 98.52402119146184, "iter": 3500, "memory": 16130, "step": 3500} +{"base_lr": 1.8177674289830764e-06, "lr": 1.8177674289830764e-06, "data_time": 0.008218526840209961, "loss": 1.671875, "time": 1.0319406986236572, "tflops": 5.27446414102496, "tokens_per_sec": 87.2143138844602, "iter": 3510, "memory": 16131, "step": 3510} +{"base_lr": 1.816488391923944e-06, "lr": 1.816488391923944e-06, "data_time": 0.008553504943847656, "loss": 0.017822265625, "time": 1.2586088180541992, "tflops": 4.950077180456208, "tokens_per_sec": 81.8363883380028, "iter": 3520, "memory": 16131, "step": 3520} +{"base_lr": 1.81520533525018e-06, "lr": 1.81520533525018e-06, "data_time": 0.00824117660522461, "loss": 0.138671875, "time": 1.0250351428985596, "tflops": 6.2553255656213995, "tokens_per_sec": 103.41108861902372, "iter": 3530, "memory": 16131, "step": 3530} +{"base_lr": 1.813918265278339e-06, "lr": 1.813918265278339e-06, "data_time": 0.008561849594116211, "loss": 2.109375, "time": 1.213369369506836, "tflops": 3.4381558102164194, "tokens_per_sec": 56.86644292659837, "iter": 3540, "memory": 16131, "step": 3540} +{"base_lr": 1.8126271883447326e-06, "lr": 1.8126271883447326e-06, "data_time": 0.008634567260742188, "loss": 1.78125, "time": 1.053377389907837, "tflops": 4.592391612220437, "tokens_per_sec": 75.94619057365897, "iter": 3550, "memory": 16131, "step": 3550} +{"base_lr": 1.8113321108053993e-06, "lr": 1.8113321108053993e-06, "data_time": 0.00825810432434082, "loss": 1.7578125, "time": 1.0181474685668945, "tflops": 4.156833330276824, "tokens_per_sec": 68.75231944392158, "iter": 3560, "memory": 16131, "step": 3560} +{"base_lr": 1.810033039036073e-06, "lr": 1.810033039036073e-06, "data_time": 0.008419275283813477, "loss": 0.177734375, "time": 0.9802560806274414, "tflops": 5.984985725720556, "tokens_per_sec": 98.95373455660012, "iter": 3570, "memory": 16130, "step": 3570} +{"base_lr": 1.8087299794321523e-06, "lr": 1.8087299794321523e-06, "data_time": 0.008353233337402344, "loss": 1.8984375, "time": 1.0852975845336914, "tflops": 6.801207343031782, "tokens_per_sec": 112.41156502924132, "iter": 3580, "memory": 16131, "step": 3580} +{"base_lr": 1.807422938408666e-06, "lr": 1.807422938408666e-06, "data_time": 0.008428096771240234, "loss": 0.037841796875, "time": 1.0642881393432617, "tflops": 5.8538759937836105, "tokens_per_sec": 96.77830297297237, "iter": 3590, "memory": 16130, "step": 3590} +{"base_lr": 1.8061119224002447e-06, "lr": 1.8061119224002447e-06, "data_time": 0.008830070495605469, "loss": 2.015625, "time": 1.0089993476867676, "tflops": 3.2350861866385454, "tokens_per_sec": 53.51836958442729, "iter": 3600, "memory": 16131, "step": 3600} +{"base_lr": 1.804796937861089e-06, "lr": 1.804796937861089e-06, "data_time": 0.008980989456176758, "loss": 1.5, "time": 1.038203477859497, "tflops": 4.36800245568947, "tokens_per_sec": 72.24017410783294, "iter": 3610, "memory": 16131, "step": 3610} +{"base_lr": 1.8034779912649355e-06, "lr": 1.8034779912649355e-06, "data_time": 0.00857090950012207, "loss": 1.265625, "time": 1.0040900707244873, "tflops": 4.27530103925669, "tokens_per_sec": 70.71078787653006, "iter": 3620, "memory": 16130, "step": 3620} +{"base_lr": 1.802155089105026e-06, "lr": 1.802155089105026e-06, "data_time": 0.008715152740478516, "loss": 2.46875, "time": 0.999962568283081, "tflops": 5.382587192626703, "tokens_per_sec": 89.00333154742232, "iter": 3630, "memory": 16131, "step": 3630} +{"base_lr": 1.8008282378940768e-06, "lr": 1.8008282378940768e-06, "data_time": 0.008325576782226562, "loss": 0.046630859375, "time": 1.2189998626708984, "tflops": 6.303821361479754, "tokens_per_sec": 104.18376891497883, "iter": 3640, "memory": 16131, "step": 3640} +{"base_lr": 1.7994974441642447e-06, "lr": 1.7994974441642447e-06, "data_time": 0.00888967514038086, "loss": 0.0162353515625, "time": 0.9924471378326416, "tflops": 6.643840744792655, "tokens_per_sec": 109.82952728135236, "iter": 3650, "memory": 16130, "step": 3650} +{"base_lr": 1.7981627144670964e-06, "lr": 1.7981627144670964e-06, "data_time": 0.008179664611816406, "loss": 0.0135498046875, "time": 1.0029585361480713, "tflops": 6.030665552360382, "tokens_per_sec": 99.70501909675839, "iter": 3660, "memory": 16131, "step": 3660} +{"base_lr": 1.7968240553735758e-06, "lr": 1.7968240553735758e-06, "data_time": 0.008284330368041992, "loss": 0.0019989013671875, "time": 1.0049140453338623, "tflops": 7.646780746696979, "tokens_per_sec": 126.3789680217679, "iter": 3670, "memory": 16131, "step": 3670} +{"base_lr": 1.7954814734739709e-06, "lr": 1.7954814734739709e-06, "data_time": 0.008599042892456055, "loss": 0.154296875, "time": 1.0106298923492432, "tflops": 6.404425969570364, "tokens_per_sec": 105.87456477382538, "iter": 3680, "memory": 16130, "step": 3680} +{"base_lr": 1.7941349753778807e-06, "lr": 1.7941349753778807e-06, "data_time": 0.008736133575439453, "loss": 2.046875, "time": 1.0165653228759766, "tflops": 5.235117411161232, "tokens_per_sec": 86.56600615782527, "iter": 3690, "memory": 16131, "step": 3690} +{"base_lr": 1.7927845677141867e-06, "lr": 1.7927845677141867e-06, "data_time": 0.008363485336303711, "loss": 0.056396484375, "time": 1.0058190822601318, "tflops": 7.94121043064313, "tokens_per_sec": 131.23632502900756, "iter": 3700, "memory": 16131, "step": 3700} +{"base_lr": 1.7914302571310143e-06, "lr": 1.7914302571310143e-06, "data_time": 0.012256383895874023, "loss": 1.4609375, "time": 1.068713665008545, "tflops": 4.866364139428825, "tokens_per_sec": 80.4705720678063, "iter": 3710, "memory": 16130, "step": 3710} +{"base_lr": 1.7900720502957052e-06, "lr": 1.7900720502957052e-06, "data_time": 0.008368492126464844, "loss": 1.2265625, "time": 1.028611421585083, "tflops": 4.408735160462617, "tokens_per_sec": 72.91383162395047, "iter": 3720, "memory": 16131, "step": 3720} +{"base_lr": 1.7887099538947824e-06, "lr": 1.7887099538947824e-06, "data_time": 0.008503437042236328, "loss": 1.53125, "time": 1.2025041580200195, "tflops": 4.274586283592834, "tokens_per_sec": 70.68582626764956, "iter": 3730, "memory": 16131, "step": 3730} +{"base_lr": 1.7873439746339172e-06, "lr": 1.7873439746339172e-06, "data_time": 0.00839376449584961, "loss": 1.90625, "time": 1.0198919773101807, "tflops": 4.209060781316668, "tokens_per_sec": 69.61521570861134, "iter": 3740, "memory": 16131, "step": 3740} +{"base_lr": 1.7859741192378953e-06, "lr": 1.7859741192378953e-06, "data_time": 0.008376598358154297, "loss": 1.8671875, "time": 1.2108867168426514, "tflops": 3.2453105730677523, "tokens_per_sec": 53.67967052230266, "iter": 3750, "memory": 16131, "step": 3750} +{"base_lr": 1.7846003944505861e-06, "lr": 1.7846003944505861e-06, "data_time": 0.008391141891479492, "loss": 2.28125, "time": 1.2429194450378418, "tflops": 4.866371282984458, "tokens_per_sec": 80.45573701429616, "iter": 3760, "memory": 16131, "step": 3760} +{"base_lr": 1.783222807034908e-06, "lr": 1.783222807034908e-06, "data_time": 0.008590221405029297, "loss": 0.059814453125, "time": 1.288163185119629, "tflops": 6.012411296050763, "tokens_per_sec": 99.36629262387555, "iter": 3770, "memory": 16131, "step": 3770} +{"base_lr": 1.7818413637727946e-06, "lr": 1.7818413637727946e-06, "data_time": 0.00818943977355957, "loss": 1.5078125, "time": 1.025787115097046, "tflops": 5.424162519081928, "tokens_per_sec": 89.68722520091951, "iter": 3780, "memory": 16131, "step": 3780} +{"base_lr": 1.7804560714651637e-06, "lr": 1.7804560714651637e-06, "data_time": 0.008545398712158203, "loss": 0.003936767578125, "time": 1.2770657539367676, "tflops": 5.068262388396494, "tokens_per_sec": 83.78581891345134, "iter": 3790, "memory": 16131, "step": 3790} +{"base_lr": 1.7790669369318802e-06, "lr": 1.7790669369318802e-06, "data_time": 0.010355710983276367, "loss": 1.546875, "time": 1.0789480209350586, "tflops": 4.764092134281436, "tokens_per_sec": 78.7804401608308, "iter": 3800, "memory": 16131, "step": 3800} +{"base_lr": 1.7776739670117251e-06, "lr": 1.7776739670117251e-06, "data_time": 0.013310670852661133, "loss": 1.96875, "time": 1.0619721412658691, "tflops": 5.41040631746302, "tokens_per_sec": 89.4562072849394, "iter": 3810, "memory": 16130, "step": 3810} +{"base_lr": 1.7762771685623626e-06, "lr": 1.7762771685623626e-06, "data_time": 0.008238792419433594, "loss": 2.125, "time": 1.2221908569335938, "tflops": 3.413340006629794, "tokens_per_sec": 56.45599425695308, "iter": 3820, "memory": 16131, "step": 3820} +{"base_lr": 1.7748765484603033e-06, "lr": 1.7748765484603033e-06, "data_time": 0.008069753646850586, "loss": 1.359375, "time": 1.0180847644805908, "tflops": 4.929971081073727, "tokens_per_sec": 81.52562821452655, "iter": 3830, "memory": 16131, "step": 3830} +{"base_lr": 1.7734721136008723e-06, "lr": 1.7734721136008723e-06, "data_time": 0.00828242301940918, "loss": 0.045166015625, "time": 1.2725598812103271, "tflops": 4.515073016926142, "tokens_per_sec": 74.65267560499485, "iter": 3840, "memory": 16130, "step": 3840} +{"base_lr": 1.7720638708981748e-06, "lr": 1.7720638708981748e-06, "data_time": 0.00921940803527832, "loss": 2.09375, "time": 1.2450933456420898, "tflops": 6.17171186468998, "tokens_per_sec": 102.0003845048297, "iter": 3850, "memory": 16131, "step": 3850} +{"base_lr": 1.7706518272850633e-06, "lr": 1.7706518272850633e-06, "data_time": 0.009045600891113281, "loss": 0.005767822265625, "time": 1.2468154430389404, "tflops": 5.43417024247987, "tokens_per_sec": 89.82885207687646, "iter": 3860, "memory": 16131, "step": 3860} +{"base_lr": 1.769235989713101e-06, "lr": 1.769235989713101e-06, "data_time": 0.008755207061767578, "loss": 2.0625, "time": 1.2190227508544922, "tflops": 3.174013861215187, "tokens_per_sec": 52.50107100551301, "iter": 3870, "memory": 16130, "step": 3870} +{"base_lr": 1.7678163651525297e-06, "lr": 1.7678163651525297e-06, "data_time": 0.008665323257446289, "loss": 0.000949859619140625, "time": 1.036799430847168, "tflops": 7.470072470082399, "tokens_per_sec": 123.45685789515512, "iter": 3880, "memory": 16131, "step": 3880} +{"base_lr": 1.766392960592235e-06, "lr": 1.766392960592235e-06, "data_time": 0.008764982223510742, "loss": 0.0012969970703125, "time": 1.22867751121521, "tflops": 5.859583999535822, "tokens_per_sec": 96.85210229184345, "iter": 3890, "memory": 16131, "step": 3890} +{"base_lr": 1.764965783039711e-06, "lr": 1.764965783039711e-06, "data_time": 0.010089635848999023, "loss": 1.765625, "time": 1.1814873218536377, "tflops": 6.606569413016812, "tokens_per_sec": 109.18441325083579, "iter": 3900, "memory": 16131, "step": 3900} +{"base_lr": 1.7635348395210265e-06, "lr": 1.7635348395210265e-06, "data_time": 0.008782386779785156, "loss": 0.014892578125, "time": 1.0095648765563965, "tflops": 6.651205596630366, "tokens_per_sec": 109.94835753251301, "iter": 3910, "memory": 16130, "step": 3910} +{"base_lr": 1.7621001370807906e-06, "lr": 1.7621001370807906e-06, "data_time": 0.00845026969909668, "loss": 0.047607421875, "time": 1.0184364318847656, "tflops": 6.176901737483813, "tokens_per_sec": 102.11732096762353, "iter": 3920, "memory": 16131, "step": 3920} +{"base_lr": 1.760661682782118e-06, "lr": 1.760661682782118e-06, "data_time": 0.00840306282043457, "loss": 2.546875, "time": 1.016709327697754, "tflops": 3.865119909792302, "tokens_per_sec": 63.93174354672507, "iter": 3930, "memory": 16130, "step": 3930} +{"base_lr": 1.7592194837065941e-06, "lr": 1.7592194837065941e-06, "data_time": 0.00843667984008789, "loss": 0.05615234375, "time": 1.2945873737335205, "tflops": 5.701688469291128, "tokens_per_sec": 94.23852145882151, "iter": 3940, "memory": 16131, "step": 3940} +{"base_lr": 1.7577735469542384e-06, "lr": 1.7577735469542384e-06, "data_time": 0.008453369140625, "loss": 2.609375, "time": 1.0267462730407715, "tflops": 5.890946627428299, "tokens_per_sec": 97.39504551961656, "iter": 3950, "memory": 16130, "step": 3950} +{"base_lr": 1.7563238796434729e-06, "lr": 1.7563238796434729e-06, "data_time": 0.0085906982421875, "loss": 2.203125, "time": 1.2473464012145996, "tflops": 3.9753120577889285, "tokens_per_sec": 65.7395571271036, "iter": 3960, "memory": 16131, "step": 3960} +{"base_lr": 1.754870488911085e-06, "lr": 1.754870488911085e-06, "data_time": 0.00869894027709961, "loss": 1.1875, "time": 1.0104587078094482, "tflops": 4.188463417422427, "tokens_per_sec": 69.2754681204957, "iter": 3970, "memory": 16130, "step": 3970} +{"base_lr": 1.7534133819121928e-06, "lr": 1.7534133819121928e-06, "data_time": 0.008410930633544922, "loss": 1.921875, "time": 1.0384047031402588, "tflops": 4.367156010639814, "tokens_per_sec": 72.2261751830658, "iter": 3980, "memory": 16131, "step": 3980} +{"base_lr": 1.7519525658202086e-06, "lr": 1.7519525658202086e-06, "data_time": 0.008136272430419922, "loss": 0.00933837890625, "time": 1.0213565826416016, "tflops": 6.633733500695404, "tokens_per_sec": 109.65807819069163, "iter": 3990, "memory": 16131, "step": 3990} +{"base_lr": 1.7504880478268064e-06, "lr": 1.7504880478268064e-06, "data_time": 0.008300065994262695, "loss": 0.1357421875, "time": 1.3025836944580078, "tflops": 5.34105294404861, "tokens_per_sec": 88.28607366205522, "iter": 4000, "memory": 16131, "step": 4000} +{"base_lr": 1.7490198351418848e-06, "lr": 1.7490198351418848e-06, "data_time": 0.008118867874145508, "loss": 0.007354736328125, "time": 1.0028412342071533, "tflops": 7.783463360075445, "tokens_per_sec": 128.6345192036891, "iter": 4010, "memory": 16131, "step": 4010} +{"base_lr": 1.7475479349935296e-06, "lr": 1.7475479349935296e-06, "data_time": 0.008174419403076172, "loss": 0.007598876953125, "time": 1.0125718116760254, "tflops": 6.451968790397083, "tokens_per_sec": 106.65910185780302, "iter": 4020, "memory": 16130, "step": 4020} +{"base_lr": 1.7460723546279822e-06, "lr": 1.7460723546279822e-06, "data_time": 0.0084075927734375, "loss": 0.000919342041015625, "time": 1.2563660144805908, "tflops": 5.441083829537292, "tokens_per_sec": 89.94194263255896, "iter": 4030, "memory": 16131, "step": 4030} +{"base_lr": 1.7445931013096008e-06, "lr": 1.7445931013096008e-06, "data_time": 0.008261442184448242, "loss": 0.0380859375, "time": 1.0216615200042725, "tflops": 5.268267041244954, "tokens_per_sec": 87.11300000761572, "iter": 4040, "memory": 16131, "step": 4040} +{"base_lr": 1.743110182320826e-06, "lr": 1.743110182320826e-06, "data_time": 0.00817728042602539, "loss": 2.359375, "time": 1.01456880569458, "tflops": 4.469756329331382, "tokens_per_sec": 73.92302974324211, "iter": 4050, "memory": 16131, "step": 4050} +{"base_lr": 1.741623604962145e-06, "lr": 1.741623604962145e-06, "data_time": 0.008122920989990234, "loss": 2.4375, "time": 1.0132648944854736, "tflops": 6.686708890537258, "tokens_per_sec": 110.53378105708676, "iter": 4060, "memory": 16130, "step": 4060} +{"base_lr": 1.7401333765520548e-06, "lr": 1.7401333765520548e-06, "data_time": 0.00830984115600586, "loss": 0.00823974609375, "time": 1.012542486190796, "tflops": 6.631646225902739, "tokens_per_sec": 109.62502958021493, "iter": 4070, "memory": 16130, "step": 4070} +{"base_lr": 1.7386395044270271e-06, "lr": 1.7386395044270271e-06, "data_time": 0.00837850570678711, "loss": 0.037353515625, "time": 1.2075471878051758, "tflops": 4.758158389253974, "tokens_per_sec": 78.67187382763257, "iter": 4080, "memory": 16130, "step": 4080} +{"base_lr": 1.7371419959414723e-06, "lr": 1.7371419959414723e-06, "data_time": 0.008246898651123047, "loss": 1.5625, "time": 0.9799697399139404, "tflops": 4.751094644979677, "tokens_per_sec": 78.57385474645723, "iter": 4090, "memory": 16131, "step": 4090} +{"base_lr": 1.7356408584677017e-06, "lr": 1.7356408584677017e-06, "data_time": 0.008313655853271484, "loss": 0.0712890625, "time": 1.0601894855499268, "tflops": 6.333607009534381, "tokens_per_sec": 104.69826527502195, "iter": 4100, "memory": 16131, "step": 4100} +{"base_lr": 1.7341360993958933e-06, "lr": 1.7341360993958933e-06, "data_time": 0.008582592010498047, "loss": 1.53125, "time": 1.0190861225128174, "tflops": 3.6779847445927514, "tokens_per_sec": 60.83882277492142, "iter": 4110, "memory": 16131, "step": 4110} +{"base_lr": 1.7326277261340533e-06, "lr": 1.7326277261340533e-06, "data_time": 0.008386850357055664, "loss": 0.0093994140625, "time": 1.0051615238189697, "tflops": 5.595706961803082, "tokens_per_sec": 92.52244320551294, "iter": 4120, "memory": 16131, "step": 4120} +{"base_lr": 1.7311157461079808e-06, "lr": 1.7311157461079808e-06, "data_time": 0.008998394012451172, "loss": 0.08056640625, "time": 1.0351719856262207, "tflops": 6.545199708362195, "tokens_per_sec": 108.19458172656992, "iter": 4130, "memory": 16131, "step": 4130} +{"base_lr": 1.7296001667612328e-06, "lr": 1.7296001667612328e-06, "data_time": 0.008642911911010742, "loss": 0.00070953369140625, "time": 1.0226366519927979, "tflops": 7.692068057392204, "tokens_per_sec": 127.12237503569197, "iter": 4140, "memory": 16131, "step": 4140} +{"base_lr": 1.728080995555086e-06, "lr": 1.728080995555086e-06, "data_time": 0.008293867111206055, "loss": 0.007415771484375, "time": 1.006922721862793, "tflops": 6.307687978044408, "tokens_per_sec": 104.27811163665785, "iter": 4150, "memory": 16131, "step": 4150} +{"base_lr": 1.726558239968497e-06, "lr": 1.726558239968497e-06, "data_time": 0.008562088012695312, "loss": 0.00168609619140625, "time": 1.2811756134033203, "tflops": 5.430300423587924, "tokens_per_sec": 89.76130890785826, "iter": 4160, "memory": 16131, "step": 4160} +{"base_lr": 1.7250319074980702e-06, "lr": 1.7250319074980702e-06, "data_time": 0.008398294448852539, "loss": 2.0625, "time": 1.0332698822021484, "tflops": 7.495589505485346, "tokens_per_sec": 123.87857442150263, "iter": 4170, "memory": 16131, "step": 4170} +{"base_lr": 1.723502005658021e-06, "lr": 1.723502005658021e-06, "data_time": 0.008303403854370117, "loss": 0.0150146484375, "time": 1.172471284866333, "tflops": 6.8124661904157335, "tokens_per_sec": 112.58271456510425, "iter": 4180, "memory": 16130, "step": 4180} +{"base_lr": 1.721968541980134e-06, "lr": 1.721968541980134e-06, "data_time": 0.007853031158447266, "loss": 0.07568359375, "time": 1.00337553024292, "tflops": 7.054537763893103, "tokens_per_sec": 116.6063915985248, "iter": 4190, "memory": 16131, "step": 4190} +{"base_lr": 1.7204315240137311e-06, "lr": 1.7204315240137311e-06, "data_time": 0.008785009384155273, "loss": 0.005615234375, "time": 0.9935007095336914, "tflops": 6.636795191097708, "tokens_per_sec": 109.71305702544534, "iter": 4200, "memory": 16130, "step": 4200} +{"base_lr": 1.7188909593256311e-06, "lr": 1.7188909593256311e-06, "data_time": 0.008547306060791016, "loss": 1.984375, "time": 1.0219590663909912, "tflops": 5.3259806479100815, "tokens_per_sec": 88.06614957460422, "iter": 4210, "memory": 16131, "step": 4210} +{"base_lr": 1.7173468555001131e-06, "lr": 1.7173468555001131e-06, "data_time": 0.008774042129516602, "loss": 1.8671875, "time": 1.0235540866851807, "tflops": 7.507524491285634, "tokens_per_sec": 124.0774685499721, "iter": 4220, "memory": 16131, "step": 4220} +{"base_lr": 1.7157992201388809e-06, "lr": 1.7157992201388809e-06, "data_time": 0.008774757385253906, "loss": 0.0027008056640625, "time": 1.2350270748138428, "tflops": 5.976657558230957, "tokens_per_sec": 98.78325948302827, "iter": 4230, "memory": 16131, "step": 4230} +{"base_lr": 1.714248060861024e-06, "lr": 1.714248060861024e-06, "data_time": 0.012374162673950195, "loss": 2.0, "time": 1.0690772533416748, "tflops": 5.544388840042112, "tokens_per_sec": 91.66783756139625, "iter": 4240, "memory": 16131, "step": 4240} +{"base_lr": 1.7126933853029786e-06, "lr": 1.7126933853029786e-06, "data_time": 0.008256196975708008, "loss": 1.734375, "time": 0.9947643280029297, "tflops": 3.7679107568654127, "tokens_per_sec": 62.32632016912761, "iter": 4250, "memory": 16130, "step": 4250} +{"base_lr": 1.7111352011184943e-06, "lr": 1.7111352011184943e-06, "data_time": 0.007447957992553711, "loss": 0.0308837890625, "time": 0.9993367195129395, "tflops": 6.840529995436285, "tokens_per_sec": 113.07500044125398, "iter": 4260, "memory": 16130, "step": 4260} +{"base_lr": 1.709573515978592e-06, "lr": 1.709573515978592e-06, "data_time": 0.008538961410522461, "loss": 1.421875, "time": 1.0132074356079102, "tflops": 5.551271278616086, "tokens_per_sec": 91.78771960364614, "iter": 4270, "memory": 16131, "step": 4270} +{"base_lr": 1.7080083375715281e-06, "lr": 1.7080083375715281e-06, "data_time": 0.00844120979309082, "loss": 0.0026702880859375, "time": 0.9968059062957764, "tflops": 7.22260877451954, "tokens_per_sec": 119.3813151068654, "iter": 4280, "memory": 16130, "step": 4280} +{"base_lr": 1.7064396736027568e-06, "lr": 1.7064396736027568e-06, "data_time": 0.008430242538452148, "loss": 0.0164794921875, "time": 1.2332122325897217, "tflops": 6.083736663648504, "tokens_per_sec": 100.55041356466425, "iter": 4290, "memory": 16131, "step": 4290} +{"base_lr": 1.704867531794893e-06, "lr": 1.704867531794893e-06, "data_time": 0.008435249328613281, "loss": 0.00927734375, "time": 1.008124828338623, "tflops": 6.840997904307778, "tokens_per_sec": 113.08123438221183, "iter": 4300, "memory": 16131, "step": 4300} +{"base_lr": 1.7032919198876713e-06, "lr": 1.7032919198876713e-06, "data_time": 0.008733987808227539, "loss": 0.01806640625, "time": 1.0132105350494385, "tflops": 7.285093912830812, "tokens_per_sec": 120.40932834746603, "iter": 4310, "memory": 16131, "step": 4310} +{"base_lr": 1.7017128456379116e-06, "lr": 1.7017128456379116e-06, "data_time": 0.008687257766723633, "loss": 1.1796875, "time": 0.9960479736328125, "tflops": 4.978265425210311, "tokens_per_sec": 82.32535196155774, "iter": 4320, "memory": 16131, "step": 4320} +{"base_lr": 1.7001303168194767e-06, "lr": 1.7001303168194767e-06, "data_time": 0.008561372756958008, "loss": 1.4765625, "time": 1.017251968383789, "tflops": 4.934007112264881, "tokens_per_sec": 81.59237099515165, "iter": 4330, "memory": 16131, "step": 4330} +{"base_lr": 1.6985443412232395e-06, "lr": 1.6985443412232395e-06, "data_time": 0.008636474609375, "loss": 1.4453125, "time": 1.0084996223449707, "tflops": 5.457101015861364, "tokens_per_sec": 90.23305312531093, "iter": 4340, "memory": 16131, "step": 4340} +{"base_lr": 1.6969549266570392e-06, "lr": 1.6969549266570392e-06, "data_time": 0.00833439826965332, "loss": 1.296875, "time": 1.2023060321807861, "tflops": 4.1242338109156815, "tokens_per_sec": 68.20226947643042, "iter": 4350, "memory": 16131, "step": 4350} +{"base_lr": 1.6953620809456464e-06, "lr": 1.6953620809456464e-06, "data_time": 0.008415460586547852, "loss": 2.140625, "time": 1.2666497230529785, "tflops": 8.89201298458739, "tokens_per_sec": 146.84406952818918, "iter": 4360, "memory": 16132, "step": 4360} +{"base_lr": 1.693765811930723e-06, "lr": 1.693765811930723e-06, "data_time": 0.008328914642333984, "loss": 0.06591796875, "time": 1.0083966255187988, "tflops": 5.757911883418931, "tokens_per_sec": 95.20063591100855, "iter": 4370, "memory": 16131, "step": 4370} +{"base_lr": 1.6921661274707838e-06, "lr": 1.6921661274707838e-06, "data_time": 0.008327722549438477, "loss": 1.0234375, "time": 1.0175495147705078, "tflops": 4.218750302135372, "tokens_per_sec": 69.77547428337495, "iter": 4380, "memory": 16131, "step": 4380} +{"base_lr": 1.6905630354411579e-06, "lr": 1.6905630354411579e-06, "data_time": 0.00875234603881836, "loss": 0.109375, "time": 1.287663459777832, "tflops": 4.603205866845117, "tokens_per_sec": 76.10684240184341, "iter": 4390, "memory": 16130, "step": 4390} +{"base_lr": 1.6889565437339514e-06, "lr": 1.6889565437339514e-06, "data_time": 0.008693218231201172, "loss": 1.0390625, "time": 1.0156259536743164, "tflops": 4.286328941435953, "tokens_per_sec": 70.89224112425306, "iter": 4400, "memory": 16130, "step": 4400} +{"base_lr": 1.6873466602580052e-06, "lr": 1.6873466602580052e-06, "data_time": 0.008686065673828125, "loss": 0.1240234375, "time": 1.2553789615631104, "tflops": 5.204071381629861, "tokens_per_sec": 86.02979921333069, "iter": 4410, "memory": 16130, "step": 4410} +{"base_lr": 1.6857333929388583e-06, "lr": 1.6857333929388583e-06, "data_time": 0.008541345596313477, "loss": 0.00433349609375, "time": 1.0613017082214355, "tflops": 6.954981645770577, "tokens_per_sec": 114.9531740642693, "iter": 4420, "memory": 16130, "step": 4420} +{"base_lr": 1.684116749718711e-06, "lr": 1.684116749718711e-06, "data_time": 0.008734703063964844, "loss": 2.40625, "time": 1.2403910160064697, "tflops": 4.583348462818731, "tokens_per_sec": 75.78255468389648, "iter": 4430, "memory": 16131, "step": 4430} +{"base_lr": 1.6824967385563808e-06, "lr": 1.6824967385563808e-06, "data_time": 0.008772134780883789, "loss": 2.453125, "time": 1.2300212383270264, "tflops": 3.0964411334499946, "tokens_per_sec": 51.21862780648909, "iter": 4440, "memory": 16131, "step": 4440} +{"base_lr": 1.6808733674272668e-06, "lr": 1.6808733674272668e-06, "data_time": 0.008307218551635742, "loss": 0.11865234375, "time": 1.039696216583252, "tflops": 7.332670203616353, "tokens_per_sec": 121.1892454643645, "iter": 4450, "memory": 16131, "step": 4450} +{"base_lr": 1.6792466443233088e-06, "lr": 1.6792466443233088e-06, "data_time": 0.008426666259765625, "loss": 1.640625, "time": 1.0170352458953857, "tflops": 6.125855337378077, "tokens_per_sec": 101.27475956766743, "iter": 4460, "memory": 16131, "step": 4460} +{"base_lr": 1.6776165772529487e-06, "lr": 1.6776165772529487e-06, "data_time": 0.008149862289428711, "loss": 0.043212890625, "time": 1.0083367824554443, "tflops": 7.440508572140753, "tokens_per_sec": 122.97478596180859, "iter": 4470, "memory": 16130, "step": 4470} +{"base_lr": 1.6759831742410923e-06, "lr": 1.6759831742410923e-06, "data_time": 0.008821725845336914, "loss": 0.00537109375, "time": 1.2789373397827148, "tflops": 5.629313384914961, "tokens_per_sec": 93.04599709327782, "iter": 4480, "memory": 16131, "step": 4480} +{"base_lr": 1.674346443329066e-06, "lr": 1.674346443329066e-06, "data_time": 0.008309125900268555, "loss": 2.265625, "time": 1.2379164695739746, "tflops": 6.305415748375948, "tokens_per_sec": 104.20735418787244, "iter": 4490, "memory": 16131, "step": 4490} +{"base_lr": 1.6727063925745806e-06, "lr": 1.6727063925745806e-06, "data_time": 0.008338212966918945, "loss": 0.00091552734375, "time": 1.0152435302734375, "tflops": 7.62867888779592, "tokens_per_sec": 126.07812429535939, "iter": 4500, "memory": 16131, "step": 4500} +{"base_lr": 1.6710630300516916e-06, "lr": 1.6710630300516916e-06, "data_time": 0.008545875549316406, "loss": 1.921875, "time": 1.0253896713256836, "tflops": 6.784902043296945, "tokens_per_sec": 112.15248526075861, "iter": 4510, "memory": 16130, "step": 4510} +{"base_lr": 1.6694163638507574e-06, "lr": 1.6694163638507574e-06, "data_time": 0.008337259292602539, "loss": 2.40625, "time": 1.2872488498687744, "tflops": 3.569940155568908, "tokens_per_sec": 59.04064315745055, "iter": 4520, "memory": 16131, "step": 4520} +{"base_lr": 1.6677664020784008e-06, "lr": 1.6677664020784008e-06, "data_time": 0.008243322372436523, "loss": 0.048095703125, "time": 1.2138235569000244, "tflops": 5.082815689047526, "tokens_per_sec": 84.031982589309, "iter": 4530, "memory": 16131, "step": 4530} +{"base_lr": 1.66611315285747e-06, "lr": 1.66611315285747e-06, "data_time": 0.008100509643554688, "loss": 1.8671875, "time": 1.0246777534484863, "tflops": 4.307528006455923, "tokens_per_sec": 71.24190971673973, "iter": 4540, "memory": 16130, "step": 4540} +{"base_lr": 1.6644566243269957e-06, "lr": 1.6644566243269957e-06, "data_time": 0.008301734924316406, "loss": 1.2421875, "time": 1.0426013469696045, "tflops": 4.523740763504861, "tokens_per_sec": 74.81287092774028, "iter": 4550, "memory": 16130, "step": 4550} +{"base_lr": 1.6627968246421536e-06, "lr": 1.6627968246421536e-06, "data_time": 0.008340120315551758, "loss": 0.0693359375, "time": 1.2300207614898682, "tflops": 5.360609298496749, "tokens_per_sec": 88.61639039969101, "iter": 4560, "memory": 16131, "step": 4560} +{"base_lr": 1.6611337619742239e-06, "lr": 1.6611337619742239e-06, "data_time": 0.008516788482666016, "loss": 0.054931640625, "time": 1.0092377662658691, "tflops": 7.01356093287565, "tokens_per_sec": 115.92907430800811, "iter": 4570, "memory": 16130, "step": 4570} +{"base_lr": 1.6594674445105502e-06, "lr": 1.6594674445105502e-06, "data_time": 0.008989334106445312, "loss": 1.09375, "time": 1.0415737628936768, "tflops": 4.179547406045982, "tokens_per_sec": 69.12616519823051, "iter": 4580, "memory": 16131, "step": 4580} +{"base_lr": 1.6577978804544996e-06, "lr": 1.6577978804544996e-06, "data_time": 0.00735163688659668, "loss": 0.00433349609375, "time": 1.0020225048065186, "tflops": 6.640810270741606, "tokens_per_sec": 109.77797352079453, "iter": 4590, "memory": 16131, "step": 4590} +{"base_lr": 1.6561250780254222e-06, "lr": 1.6561250780254222e-06, "data_time": 0.008406639099121094, "loss": 0.0179443359375, "time": 1.017164945602417, "tflops": 8.448648052429629, "tokens_per_sec": 139.60370991330294, "iter": 4600, "memory": 16131, "step": 4600} +{"base_lr": 1.6544490454586117e-06, "lr": 1.6544490454586117e-06, "data_time": 0.008746623992919922, "loss": 0.005523681640625, "time": 1.0270187854766846, "tflops": 7.423184050731904, "tokens_per_sec": 122.68519503408612, "iter": 4610, "memory": 16130, "step": 4610} +{"base_lr": 1.652769791005262e-06, "lr": 1.652769791005262e-06, "data_time": 0.008371591567993164, "loss": 0.08203125, "time": 1.293555498123169, "tflops": 5.425170184175864, "tokens_per_sec": 89.67531750142591, "iter": 4620, "memory": 16131, "step": 4620} +{"base_lr": 1.6510873229324305e-06, "lr": 1.6510873229324305e-06, "data_time": 0.008459806442260742, "loss": 0.001190185546875, "time": 1.0060455799102783, "tflops": 6.13256649792755, "tokens_per_sec": 101.38705644827267, "iter": 4630, "memory": 16131, "step": 4630} +{"base_lr": 1.6494016495229948e-06, "lr": 1.6494016495229948e-06, "data_time": 0.00842142105102539, "loss": 0.09716796875, "time": 1.0086803436279297, "tflops": 6.95736638899119, "tokens_per_sec": 115.00174533258648, "iter": 4640, "memory": 16131, "step": 4640} +{"base_lr": 1.6477127790756119e-06, "lr": 1.6477127790756119e-06, "data_time": 0.008763551712036133, "loss": 0.109375, "time": 0.9991722106933594, "tflops": 7.2661505934344195, "tokens_per_sec": 120.09941701301705, "iter": 4650, "memory": 16131, "step": 4650} +{"base_lr": 1.646020719904678e-06, "lr": 1.646020719904678e-06, "data_time": 0.008440971374511719, "loss": 1.9140625, "time": 1.0095245838165283, "tflops": 7.071590072210034, "tokens_per_sec": 116.88670280200775, "iter": 4660, "memory": 16131, "step": 4660} +{"base_lr": 1.6443254803402893e-06, "lr": 1.6443254803402893e-06, "data_time": 0.00832366943359375, "loss": 0.01055908203125, "time": 1.2294292449951172, "tflops": 5.560297864394457, "tokens_per_sec": 91.91256874677393, "iter": 4670, "memory": 16130, "step": 4670} +{"base_lr": 1.6426270687281973e-06, "lr": 1.6426270687281973e-06, "data_time": 0.008811712265014648, "loss": 1.9921875, "time": 1.0287632942199707, "tflops": 8.235525421011172, "tokens_per_sec": 136.0857262175307, "iter": 4680, "memory": 16131, "step": 4680} +{"base_lr": 1.6409254934297698e-06, "lr": 1.6409254934297698e-06, "data_time": 0.008209228515625, "loss": 2.09375, "time": 1.212629795074463, "tflops": 8.737559573604768, "tokens_per_sec": 144.31444840847706, "iter": 4690, "memory": 16132, "step": 4690} +{"base_lr": 1.6392207628219509e-06, "lr": 1.6392207628219509e-06, "data_time": 0.008422374725341797, "loss": 0.01141357421875, "time": 0.9878237247467041, "tflops": 6.674936596684859, "tokens_per_sec": 110.34357372600991, "iter": 4700, "memory": 16131, "step": 4700} +{"base_lr": 1.6375128852972175e-06, "lr": 1.6375128852972175e-06, "data_time": 0.008663654327392578, "loss": 2.390625, "time": 1.3465375900268555, "tflops": 3.5026561815317994, "tokens_per_sec": 57.926344260754306, "iter": 4710, "memory": 16131, "step": 4710} +{"base_lr": 1.6358018692635395e-06, "lr": 1.6358018692635395e-06, "data_time": 0.00868678092956543, "loss": 2.234375, "time": 1.0201289653778076, "tflops": 7.710976741258418, "tokens_per_sec": 127.43486795488323, "iter": 4720, "memory": 16131, "step": 4720} +{"base_lr": 1.6340877231443357e-06, "lr": 1.6340877231443357e-06, "data_time": 0.008123397827148438, "loss": 0.00191497802734375, "time": 1.012152910232544, "tflops": 7.412455566363847, "tokens_per_sec": 122.51113319566335, "iter": 4730, "memory": 16131, "step": 4730} +{"base_lr": 1.6323704553784375e-06, "lr": 1.6323704553784375e-06, "data_time": 0.008501768112182617, "loss": 0.031005859375, "time": 1.0169177055358887, "tflops": 6.305258036248644, "tokens_per_sec": 104.23655662877515, "iter": 4740, "memory": 16130, "step": 4740} +{"base_lr": 1.630650074420043e-06, "lr": 1.630650074420043e-06, "data_time": 0.008292913436889648, "loss": 0.255859375, "time": 1.2293994426727295, "tflops": 5.609714465912837, "tokens_per_sec": 92.72820211474138, "iter": 4750, "memory": 16131, "step": 4750} +{"base_lr": 1.6289265887386772e-06, "lr": 1.6289265887386772e-06, "data_time": 0.008468389511108398, "loss": 0.00457763671875, "time": 1.0070765018463135, "tflops": 7.088780257985168, "tokens_per_sec": 117.17084033193974, "iter": 4760, "memory": 16131, "step": 4760} +{"base_lr": 1.6272000068191487e-06, "lr": 1.6272000068191487e-06, "data_time": 0.00832676887512207, "loss": 0.00335693359375, "time": 1.0331947803497314, "tflops": 6.440451953484638, "tokens_per_sec": 106.4658882255087, "iter": 4770, "memory": 16131, "step": 4770} +{"base_lr": 1.62547033716151e-06, "lr": 1.62547033716151e-06, "data_time": 0.008851289749145508, "loss": 1.703125, "time": 1.0168507099151611, "tflops": 6.782293379645297, "tokens_per_sec": 112.11085254530553, "iter": 4780, "memory": 16132, "step": 4780} +{"base_lr": 1.6237375882810138e-06, "lr": 1.6237375882810138e-06, "data_time": 0.008695363998413086, "loss": 1.2890625, "time": 1.0356526374816895, "tflops": 4.43720335664844, "tokens_per_sec": 73.38367831972069, "iter": 4790, "memory": 16131, "step": 4790} +{"base_lr": 1.6220017687080731e-06, "lr": 1.6220017687080731e-06, "data_time": 0.008603096008300781, "loss": 0.004791259765625, "time": 1.2789020538330078, "tflops": 5.913778264196642, "tokens_per_sec": 97.74008855898208, "iter": 4800, "memory": 16131, "step": 4800} +{"base_lr": 1.620262886988217e-06, "lr": 1.620262886988217e-06, "data_time": 0.008403778076171875, "loss": 0.09521484375, "time": 1.0276727676391602, "tflops": 6.592961876366865, "tokens_per_sec": 108.98410810007648, "iter": 4810, "memory": 16131, "step": 4810} +{"base_lr": 1.6185209516820514e-06, "lr": 1.6185209516820514e-06, "data_time": 0.008565425872802734, "loss": 1.1796875, "time": 1.0440027713775635, "tflops": 3.937957140810341, "tokens_per_sec": 65.13392671382351, "iter": 4820, "memory": 16131, "step": 4820} +{"base_lr": 1.6167759713652132e-06, "lr": 1.6167759713652132e-06, "data_time": 0.008951902389526367, "loss": 1.6953125, "time": 1.0057437419891357, "tflops": 6.134406967866085, "tokens_per_sec": 101.41748413782365, "iter": 4830, "memory": 16130, "step": 4830} +{"base_lr": 1.6150279546283311e-06, "lr": 1.6150279546283311e-06, "data_time": 0.00864100456237793, "loss": 0.1943359375, "time": 1.0163516998291016, "tflops": 6.964469652242676, "tokens_per_sec": 115.11763105188716, "iter": 4840, "memory": 16131, "step": 4840} +{"base_lr": 1.6132769100769813e-06, "lr": 1.6132769100769813e-06, "data_time": 0.008035898208618164, "loss": 0.0003986358642578125, "time": 1.2628862857818604, "tflops": 5.029236930530772, "tokens_per_sec": 83.14287769378281, "iter": 4850, "memory": 16131, "step": 4850} +{"base_lr": 1.6115228463316475e-06, "lr": 1.6115228463316475e-06, "data_time": 0.008839130401611328, "loss": 1.7890625, "time": 1.2269055843353271, "tflops": 3.7948551567401516, "tokens_per_sec": 62.75951546968611, "iter": 4860, "memory": 16130, "step": 4860} +{"base_lr": 1.609765772027676e-06, "lr": 1.609765772027676e-06, "data_time": 0.008536100387573242, "loss": 0.003997802734375, "time": 1.0040104389190674, "tflops": 7.110428087037405, "tokens_per_sec": 117.52865849375333, "iter": 4870, "memory": 16132, "step": 4870} +{"base_lr": 1.608005695815235e-06, "lr": 1.608005695815235e-06, "data_time": 0.008675098419189453, "loss": 1.8515625, "time": 1.0337257385253906, "tflops": 4.621137764879252, "tokens_per_sec": 76.4225916562908, "iter": 4880, "memory": 16130, "step": 4880} +{"base_lr": 1.6062426263592699e-06, "lr": 1.6062426263592699e-06, "data_time": 0.008306026458740234, "loss": 0.0272216796875, "time": 1.0329716205596924, "tflops": 7.321733741237498, "tokens_per_sec": 121.01010087009995, "iter": 4890, "memory": 16131, "step": 4890} +{"base_lr": 1.6044765723394647e-06, "lr": 1.6044765723394647e-06, "data_time": 0.008721113204956055, "loss": 0.0024261474609375, "time": 1.0012671947479248, "tflops": 8.280032328144847, "tokens_per_sec": 136.82661403318397, "iter": 4900, "memory": 16131, "step": 4900} +{"base_lr": 1.6027075424501925e-06, "lr": 1.6027075424501925e-06, "data_time": 0.009385824203491211, "loss": 1.5, "time": 1.0118558406829834, "tflops": 3.883659417633683, "tokens_per_sec": 64.23839976657347, "iter": 4910, "memory": 16131, "step": 4910} +{"base_lr": 1.600935545400481e-06, "lr": 1.600935545400481e-06, "data_time": 0.008954524993896484, "loss": 0.0556640625, "time": 1.2796037197113037, "tflops": 5.200236009735958, "tokens_per_sec": 85.96411397173146, "iter": 4920, "memory": 16131, "step": 4920} +{"base_lr": 1.5991605899139623e-06, "lr": 1.5991605899139623e-06, "data_time": 0.008689165115356445, "loss": 1.8671875, "time": 0.9901068210601807, "tflops": 6.108944374221466, "tokens_per_sec": 100.99920318983521, "iter": 4930, "memory": 16130, "step": 4930} +{"base_lr": 1.5973826847288346e-06, "lr": 1.5973826847288346e-06, "data_time": 0.008162736892700195, "loss": 1.9609375, "time": 1.2480368614196777, "tflops": 7.566030606733128, "tokens_per_sec": 124.99630805969987, "iter": 4940, "memory": 16131, "step": 4940} +{"base_lr": 1.5956018385978173e-06, "lr": 1.5956018385978173e-06, "data_time": 0.00853729248046875, "loss": 0.07177734375, "time": 1.2269411087036133, "tflops": 5.324690550040771, "tokens_per_sec": 88.02378470636201, "iter": 4950, "memory": 16131, "step": 4950} +{"base_lr": 1.593818060288108e-06, "lr": 1.593818060288108e-06, "data_time": 0.009150266647338867, "loss": 1.8125, "time": 1.0135385990142822, "tflops": 6.445814430080223, "tokens_per_sec": 106.5573625956909, "iter": 4960, "memory": 16131, "step": 4960} +{"base_lr": 1.59203135858134e-06, "lr": 1.59203135858134e-06, "data_time": 0.008867979049682617, "loss": 0.039794921875, "time": 1.014521837234497, "tflops": 7.215945241384357, "tokens_per_sec": 119.26800938037643, "iter": 4970, "memory": 16131, "step": 4970} +{"base_lr": 1.5902417422735373e-06, "lr": 1.5902417422735373e-06, "data_time": 0.00845789909362793, "loss": 0.05908203125, "time": 1.0384385585784912, "tflops": 6.057923902823229, "tokens_per_sec": 100.1503643530576, "iter": 4980, "memory": 16130, "step": 4980} +{"base_lr": 1.5884492201750744e-06, "lr": 1.5884492201750744e-06, "data_time": 0.008820056915283203, "loss": 0.000579833984375, "time": 1.0358636379241943, "tflops": 9.174316790988941, "tokens_per_sec": 151.5643509935985, "iter": 4990, "memory": 16131, "step": 4990} +{"base_lr": 1.5866538011106306e-06, "lr": 1.5866538011106306e-06, "data_time": 0.008489847183227539, "loss": 1.671875, "time": 1.03572678565979, "tflops": 3.4436469725135788, "tokens_per_sec": 56.96482973775579, "iter": 5000, "memory": 16130, "step": 5000} +{"base_lr": 1.5848554939191467e-06, "lr": 1.5848554939191467e-06, "data_time": 0.008719921112060547, "loss": 0.03955078125, "time": 1.0356907844543457, "tflops": 6.36643757997664, "tokens_per_sec": 105.24376738306256, "iter": 5010, "memory": 16130, "step": 5010} +{"base_lr": 1.583054307453782e-06, "lr": 1.583054307453782e-06, "data_time": 0.009276866912841797, "loss": 2.078125, "time": 1.0138933658599854, "tflops": 4.233963321413607, "tokens_per_sec": 70.027088045603, "iter": 5020, "memory": 16130, "step": 5020} +{"base_lr": 1.5812502505818703e-06, "lr": 1.5812502505818703e-06, "data_time": 0.00861668586730957, "loss": 0.0037384033203125, "time": 1.010505199432373, "tflops": 6.704970327942119, "tokens_per_sec": 110.83564939873882, "iter": 5030, "memory": 16131, "step": 5030} +{"base_lr": 1.5794433321848778e-06, "lr": 1.5794433321848778e-06, "data_time": 0.008245229721069336, "loss": 0.053955078125, "time": 1.0455687046051025, "tflops": 6.4221734330214835, "tokens_per_sec": 106.16232057348834, "iter": 5040, "memory": 16130, "step": 5040} +{"base_lr": 1.5776335611583575e-06, "lr": 1.5776335611583575e-06, "data_time": 0.008903026580810547, "loss": 0.05224609375, "time": 1.2639577388763428, "tflops": 7.566707772580133, "tokens_per_sec": 125.00417944379757, "iter": 5050, "memory": 16131, "step": 5050} +{"base_lr": 1.5758209464119047e-06, "lr": 1.5758209464119047e-06, "data_time": 0.008663654327392578, "loss": 0.00183868408203125, "time": 1.000227451324463, "tflops": 7.5614233122452434, "tokens_per_sec": 124.97157504960977, "iter": 5060, "memory": 16131, "step": 5060} +{"base_lr": 1.5740054968691165e-06, "lr": 1.5740054968691165e-06, "data_time": 0.008421659469604492, "loss": 0.00040435791015625, "time": 1.110774278640747, "tflops": 7.791233915821733, "tokens_per_sec": 128.73902713597235, "iter": 5070, "memory": 16131, "step": 5070} +{"base_lr": 1.572187221467545e-06, "lr": 1.572187221467545e-06, "data_time": 0.008634567260742188, "loss": 0.000926971435546875, "time": 1.0197668075561523, "tflops": 7.594841122480775, "tokens_per_sec": 125.51889221284179, "iter": 5080, "memory": 16130, "step": 5080} +{"base_lr": 1.570366129158654e-06, "lr": 1.570366129158654e-06, "data_time": 0.008478164672851562, "loss": 0.03515625, "time": 1.0162253379821777, "tflops": 9.590374455002351, "tokens_per_sec": 158.42942896850417, "iter": 5090, "memory": 16131, "step": 5090} +{"base_lr": 1.568542228907775e-06, "lr": 1.568542228907775e-06, "data_time": 0.008062124252319336, "loss": 1.7578125, "time": 1.0181171894073486, "tflops": 4.394730758324735, "tokens_per_sec": 72.68318497107695, "iter": 5100, "memory": 16130, "step": 5100} +{"base_lr": 1.5667155296940637e-06, "lr": 1.5667155296940637e-06, "data_time": 0.008692741394042969, "loss": 1.8671875, "time": 1.2327461242675781, "tflops": 3.8259769148934883, "tokens_per_sec": 63.27336867214206, "iter": 5110, "memory": 16130, "step": 5110} +{"base_lr": 1.564886040510455e-06, "lr": 1.564886040510455e-06, "data_time": 0.008649587631225586, "loss": 0.048095703125, "time": 1.296377420425415, "tflops": 5.039490524932998, "tokens_per_sec": 83.30907210993828, "iter": 5120, "memory": 16130, "step": 5120} +{"base_lr": 1.5630537703636189e-06, "lr": 1.5630537703636189e-06, "data_time": 0.008417129516601562, "loss": 0.00946044921875, "time": 1.008056640625, "tflops": 6.540962546804272, "tokens_per_sec": 108.12884475649238, "iter": 5130, "memory": 16130, "step": 5130} +{"base_lr": 1.5612187282739156e-06, "lr": 1.5612187282739156e-06, "data_time": 0.008835554122924805, "loss": 0.00058746337890625, "time": 1.2506170272827148, "tflops": 6.338315311967782, "tokens_per_sec": 104.74829395576536, "iter": 5140, "memory": 16130, "step": 5140} +{"base_lr": 1.559380923275353e-06, "lr": 1.559380923275353e-06, "data_time": 0.008625984191894531, "loss": 0.08544921875, "time": 1.0358006954193115, "tflops": 7.009201464895669, "tokens_per_sec": 115.85240339243632, "iter": 5150, "memory": 16131, "step": 5150} +{"base_lr": 1.5575403644155398e-06, "lr": 1.5575403644155398e-06, "data_time": 0.009721755981445312, "loss": 1.2578125, "time": 1.0018398761749268, "tflops": 4.224496781496445, "tokens_per_sec": 69.87144519261254, "iter": 5160, "memory": 16132, "step": 5160} +{"base_lr": 1.5556970607556427e-06, "lr": 1.5556970607556427e-06, "data_time": 0.00914907455444336, "loss": 2.34375, "time": 1.2245135307312012, "tflops": 4.346084128767276, "tokens_per_sec": 71.86527367107178, "iter": 5170, "memory": 16131, "step": 5170} +{"base_lr": 1.5538510213703412e-06, "lr": 1.5538510213703412e-06, "data_time": 0.009752511978149414, "loss": 0.01251220703125, "time": 1.022573471069336, "tflops": 7.573995516693074, "tokens_per_sec": 125.17437975974612, "iter": 5180, "memory": 16132, "step": 5180} +{"base_lr": 1.5520022553477826e-06, "lr": 1.5520022553477826e-06, "data_time": 0.010068893432617188, "loss": 0.0038299560546875, "time": 1.0443034172058105, "tflops": 8.6355530180996, "tokens_per_sec": 142.6788398323248, "iter": 5190, "memory": 16131, "step": 5190} +{"base_lr": 1.5501507717895379e-06, "lr": 1.5501507717895379e-06, "data_time": 0.008951902389526367, "loss": 1.65625, "time": 1.2280352115631104, "tflops": 3.4463745767431937, "tokens_per_sec": 57.001622869464114, "iter": 5200, "memory": 16131, "step": 5200} +{"base_lr": 1.5482965798105568e-06, "lr": 1.5482965798105568e-06, "data_time": 0.008887290954589844, "loss": 0.0004520416259765625, "time": 1.06292724609375, "tflops": 5.975342499550375, "tokens_per_sec": 98.7838070627839, "iter": 5210, "memory": 16131, "step": 5210} +{"base_lr": 1.546439688539122e-06, "lr": 1.546439688539122e-06, "data_time": 0.008635997772216797, "loss": 2.0, "time": 1.0285232067108154, "tflops": 7.648043985578293, "tokens_per_sec": 126.39481457653199, "iter": 5220, "memory": 16131, "step": 5220} +{"base_lr": 1.5445801071168056e-06, "lr": 1.5445801071168056e-06, "data_time": 0.008353710174560547, "loss": 0.138671875, "time": 1.2354676723480225, "tflops": 6.268854344565989, "tokens_per_sec": 103.6044915336641, "iter": 5230, "memory": 16131, "step": 5230} +{"base_lr": 1.542717844698423e-06, "lr": 1.542717844698423e-06, "data_time": 0.008089065551757812, "loss": 2.3125, "time": 1.0465097427368164, "tflops": 4.50684596689667, "tokens_per_sec": 74.5334675967192, "iter": 5240, "memory": 16131, "step": 5240} +{"base_lr": 1.5408529104519886e-06, "lr": 1.5408529104519886e-06, "data_time": 0.008582592010498047, "loss": 1.328125, "time": 1.0175118446350098, "tflops": 5.051742450898725, "tokens_per_sec": 83.53711108926372, "iter": 5250, "memory": 16131, "step": 5250} +{"base_lr": 1.5389853135586707e-06, "lr": 1.5389853135586707e-06, "data_time": 0.009012699127197266, "loss": 0.0024261474609375, "time": 1.0402629375457764, "tflops": 7.794815502429055, "tokens_per_sec": 128.8135866072558, "iter": 5260, "memory": 16131, "step": 5260} +{"base_lr": 1.5371150632127452e-06, "lr": 1.5371150632127452e-06, "data_time": 0.008728981018066406, "loss": 1.4453125, "time": 1.0374343395233154, "tflops": 3.9045603569757863, "tokens_per_sec": 64.58240049266236, "iter": 5270, "memory": 16131, "step": 5270} +{"base_lr": 1.535242168621552e-06, "lr": 1.535242168621552e-06, "data_time": 0.008498430252075195, "loss": 2.3125, "time": 1.0380001068115234, "tflops": 6.6441032064340755, "tokens_per_sec": 109.82657829397498, "iter": 5280, "memory": 16131, "step": 5280} +{"base_lr": 1.533366639005446e-06, "lr": 1.533366639005446e-06, "data_time": 0.008565187454223633, "loss": 2.0625, "time": 1.0385355949401855, "tflops": 8.333178250925632, "tokens_per_sec": 137.6938842506389, "iter": 5290, "memory": 16131, "step": 5290} +{"base_lr": 1.5314884835977587e-06, "lr": 1.5314884835977587e-06, "data_time": 0.008499383926391602, "loss": 1.796875, "time": 1.140002727508545, "tflops": 4.668268498797466, "tokens_per_sec": 77.19279776833972, "iter": 5300, "memory": 16131, "step": 5300} +{"base_lr": 1.529607711644746e-06, "lr": 1.529607711644746e-06, "data_time": 0.008867740631103516, "loss": 0.0004138946533203125, "time": 1.0630199909210205, "tflops": 6.772722194121119, "tokens_per_sec": 111.94521365189398, "iter": 5310, "memory": 16131, "step": 5310} +{"base_lr": 1.5277243324055451e-06, "lr": 1.5277243324055451e-06, "data_time": 0.00824284553527832, "loss": 0.018798828125, "time": 1.3036584854125977, "tflops": 5.754987642405251, "tokens_per_sec": 95.1169354454513, "iter": 5320, "memory": 16131, "step": 5320} +{"base_lr": 1.5258383551521298e-06, "lr": 1.5258383551521298e-06, "data_time": 0.008987665176391602, "loss": 0.00022125244140625, "time": 1.0526700019836426, "tflops": 7.818120364410332, "tokens_per_sec": 129.19528412854316, "iter": 5330, "memory": 16131, "step": 5330} +{"base_lr": 1.5239497891692645e-06, "lr": 1.5239497891692645e-06, "data_time": 0.008301019668579102, "loss": 1.140625, "time": 1.0332872867584229, "tflops": 5.677819446043328, "tokens_per_sec": 93.87515093136359, "iter": 5340, "memory": 16131, "step": 5340} +{"base_lr": 1.5220586437544563e-06, "lr": 1.5220586437544563e-06, "data_time": 0.008466720581054688, "loss": 0.064453125, "time": 1.2318553924560547, "tflops": 5.4509836123919015, "tokens_per_sec": 90.10797913430387, "iter": 5350, "memory": 16131, "step": 5350} +{"base_lr": 1.5201649282179125e-06, "lr": 1.5201649282179125e-06, "data_time": 0.008908748626708984, "loss": 1.984375, "time": 1.0335254669189453, "tflops": 5.3835499950026655, "tokens_per_sec": 89.01570686416972, "iter": 5360, "memory": 16131, "step": 5360} +{"base_lr": 1.518268651882493e-06, "lr": 1.518268651882493e-06, "data_time": 0.008708477020263672, "loss": 2.34375, "time": 1.0282855033874512, "tflops": 5.999930367997534, "tokens_per_sec": 99.19424096117775, "iter": 5370, "memory": 16131, "step": 5370} +{"base_lr": 1.5163698240836636e-06, "lr": 1.5163698240836636e-06, "data_time": 0.008763790130615234, "loss": 1.078125, "time": 1.1359577178955078, "tflops": 4.8981013418132475, "tokens_per_sec": 80.98892991401087, "iter": 5380, "memory": 16132, "step": 5380} +{"base_lr": 1.514468454169452e-06, "lr": 1.514468454169452e-06, "data_time": 0.008684158325195312, "loss": 0.1513671875, "time": 1.039954423904419, "tflops": 6.6898782448881, "tokens_per_sec": 110.5817691203542, "iter": 5390, "memory": 16130, "step": 5390} +{"base_lr": 1.512564551500401e-06, "lr": 1.512564551500401e-06, "data_time": 0.008595466613769531, "loss": 0.000804901123046875, "time": 1.0064537525177002, "tflops": 6.792157899226457, "tokens_per_sec": 112.27540233936423, "iter": 5400, "memory": 16131, "step": 5400} +{"base_lr": 1.5106581254495223e-06, "lr": 1.5106581254495223e-06, "data_time": 0.0073549747467041016, "loss": 0.984375, "time": 1.225006341934204, "tflops": 3.899562708618757, "tokens_per_sec": 64.48946205062067, "iter": 5410, "memory": 16131, "step": 5410} +{"base_lr": 1.5087491854022497e-06, "lr": 1.5087491854022497e-06, "data_time": 0.008458137512207031, "loss": 0.00153350830078125, "time": 1.2500548362731934, "tflops": 5.8563303074763935, "tokens_per_sec": 96.795753665209, "iter": 5420, "memory": 16131, "step": 5420} +{"base_lr": 1.5068377407563947e-06, "lr": 1.5068377407563947e-06, "data_time": 0.012373208999633789, "loss": 0.0186767578125, "time": 1.2593166828155518, "tflops": 5.428334984085824, "tokens_per_sec": 89.73120227968982, "iter": 5430, "memory": 16130, "step": 5430} +{"base_lr": 1.504923800922097e-06, "lr": 1.504923800922097e-06, "data_time": 0.009055137634277344, "loss": 0.00799560546875, "time": 1.6278412342071533, "tflops": 4.45997779090024, "tokens_per_sec": 73.71726276387928, "iter": 5440, "memory": 16131, "step": 5440} +{"base_lr": 1.5030073753217813e-06, "lr": 1.5030073753217813e-06, "data_time": 0.008271217346191406, "loss": 2.25, "time": 1.2191014289855957, "tflops": 3.9184508649152963, "tokens_per_sec": 64.80182708478198, "iter": 5450, "memory": 16131, "step": 5450} +{"base_lr": 1.5010884733901115e-06, "lr": 1.5010884733901115e-06, "data_time": 0.008915424346923828, "loss": 1.3671875, "time": 1.02726411819458, "tflops": 5.0627192779353285, "tokens_per_sec": 83.7175157554043, "iter": 5460, "memory": 16131, "step": 5460} +{"base_lr": 1.499167104573941e-06, "lr": 1.499167104573941e-06, "data_time": 0.009148597717285156, "loss": 1.265625, "time": 1.0350439548492432, "tflops": 4.439812761405763, "tokens_per_sec": 73.42683336670099, "iter": 5470, "memory": 16131, "step": 5470} +{"base_lr": 1.4972432783322672e-06, "lr": 1.4972432783322672e-06, "data_time": 0.009204864501953125, "loss": 0.004852294921875, "time": 1.04787015914917, "tflops": 6.986298788789983, "tokens_per_sec": 115.4723215881363, "iter": 5480, "memory": 16131, "step": 5480} +{"base_lr": 1.4953170041361865e-06, "lr": 1.4953170041361865e-06, "data_time": 0.008821249008178711, "loss": 0.012939453125, "time": 1.0244581699371338, "tflops": 6.5545121842559, "tokens_per_sec": 108.34995830693916, "iter": 5490, "memory": 16131, "step": 5490} +{"base_lr": 1.4933882914688467e-06, "lr": 1.4933882914688467e-06, "data_time": 0.00851893424987793, "loss": 0.08349609375, "time": 1.040855884552002, "tflops": 6.975159442748294, "tokens_per_sec": 115.2897358614966, "iter": 5500, "memory": 16131, "step": 5500} +{"base_lr": 1.4914571498254e-06, "lr": 1.4914571498254e-06, "data_time": 0.008973836898803711, "loss": 0.0001888275146484375, "time": 1.0045757293701172, "tflops": 8.494169624963801, "tokens_per_sec": 140.35776087111776, "iter": 5510, "memory": 16131, "step": 5510} +{"base_lr": 1.4895235887129566e-06, "lr": 1.4895235887129566e-06, "data_time": 0.008168697357177734, "loss": 0.000659942626953125, "time": 1.0220816135406494, "tflops": 8.467329925432946, "tokens_per_sec": 139.91054931952633, "iter": 5520, "memory": 16131, "step": 5520} +{"base_lr": 1.4875876176505379e-06, "lr": 1.4875876176505379e-06, "data_time": 0.008634567260742188, "loss": 2.53125, "time": 1.0240187644958496, "tflops": 6.971497273376344, "tokens_per_sec": 115.2322634028871, "iter": 5530, "memory": 16131, "step": 5530} +{"base_lr": 1.4856492461690302e-06, "lr": 1.4856492461690302e-06, "data_time": 0.008304119110107422, "loss": 0.1640625, "time": 1.030782699584961, "tflops": 6.984536205549308, "tokens_per_sec": 115.44625268526455, "iter": 5540, "memory": 16131, "step": 5540} +{"base_lr": 1.4837084838111368e-06, "lr": 1.4837084838111368e-06, "data_time": 0.008736848831176758, "loss": 0.01190185546875, "time": 1.2763559818267822, "tflops": 5.0710808115798285, "tokens_per_sec": 83.832411586909, "iter": 5550, "memory": 16131, "step": 5550} +{"base_lr": 1.4817653401313303e-06, "lr": 1.4817653401313303e-06, "data_time": 0.008872270584106445, "loss": 0.0174560546875, "time": 1.0205974578857422, "tflops": 7.113613399266052, "tokens_per_sec": 117.57818821974436, "iter": 5560, "memory": 16131, "step": 5560} +{"base_lr": 1.4798198246958083e-06, "lr": 1.4798198246958083e-06, "data_time": 0.008346319198608398, "loss": 0.021240234375, "time": 1.0240068435668945, "tflops": 5.847561447742397, "tokens_per_sec": 96.67904137735971, "iter": 5570, "memory": 16131, "step": 5570} +{"base_lr": 1.477871947082444e-06, "lr": 1.477871947082444e-06, "data_time": 0.008388519287109375, "loss": 0.09228515625, "time": 1.0045831203460693, "tflops": 5.900338033281082, "tokens_per_sec": 97.55290330395196, "iter": 5580, "memory": 16130, "step": 5580} +{"base_lr": 1.47592171688074e-06, "lr": 1.47592171688074e-06, "data_time": 0.00835418701171875, "loss": 1.7890625, "time": 1.0130183696746826, "tflops": 7.346298554248856, "tokens_per_sec": 121.41931842694855, "iter": 5590, "memory": 16131, "step": 5590} +{"base_lr": 1.4739691436917794e-06, "lr": 1.4739691436917794e-06, "data_time": 0.00833749771118164, "loss": 0.02099609375, "time": 1.2255518436431885, "tflops": 5.281297859212511, "tokens_per_sec": 87.30760804196957, "iter": 5600, "memory": 16131, "step": 5600} +{"base_lr": 1.4720142371281825e-06, "lr": 1.4720142371281825e-06, "data_time": 0.008711576461791992, "loss": 0.0546875, "time": 1.0496411323547363, "tflops": 6.801318855096585, "tokens_per_sec": 112.4193749297625, "iter": 5610, "memory": 16131, "step": 5610} +{"base_lr": 1.4700570068140546e-06, "lr": 1.4700570068140546e-06, "data_time": 0.008648395538330078, "loss": 0.00067901611328125, "time": 1.0051770210266113, "tflops": 8.4890884569531, "tokens_per_sec": 140.27379958990014, "iter": 5620, "memory": 16131, "step": 5620} +{"base_lr": 1.4680974623849429e-06, "lr": 1.4680974623849429e-06, "data_time": 0.008279085159301758, "loss": 2.40625, "time": 1.0310735702514648, "tflops": 7.041336293686889, "tokens_per_sec": 116.38354765568982, "iter": 5630, "memory": 16131, "step": 5630} +{"base_lr": 1.466135613487785e-06, "lr": 1.466135613487785e-06, "data_time": 0.008427143096923828, "loss": 0.00885009765625, "time": 1.2960395812988281, "tflops": 5.555030254624951, "tokens_per_sec": 91.81818342357425, "iter": 5640, "memory": 16130, "step": 5640} +{"base_lr": 1.4641714697808653e-06, "lr": 1.4641714697808653e-06, "data_time": 0.008579015731811523, "loss": 1.3515625, "time": 1.0333483219146729, "tflops": 3.802883675927318, "tokens_per_sec": 62.902313403383424, "iter": 5650, "memory": 16131, "step": 5650} +{"base_lr": 1.462205040933765e-06, "lr": 1.462205040933765e-06, "data_time": 0.008147954940795898, "loss": 0.0269775390625, "time": 1.0213727951049805, "tflops": 7.226875374672664, "tokens_per_sec": 119.44708199060749, "iter": 5660, "memory": 16130, "step": 5660} +{"base_lr": 1.4602363366273146e-06, "lr": 1.4602363366273146e-06, "data_time": 0.008388042449951172, "loss": 0.000972747802734375, "time": 1.3172636032104492, "tflops": 5.00557421864299, "tokens_per_sec": 82.74729502451997, "iter": 5670, "memory": 16130, "step": 5670} +{"base_lr": 1.4582653665535475e-06, "lr": 1.4582653665535475e-06, "data_time": 0.008216619491577148, "loss": 0.0155029296875, "time": 1.0094718933105469, "tflops": 7.252043441907548, "tokens_per_sec": 119.86465477811628, "iter": 5680, "memory": 16130, "step": 5680} +{"base_lr": 1.4562921404156516e-06, "lr": 1.4562921404156516e-06, "data_time": 0.00832819938659668, "loss": 0.007354736328125, "time": 1.2972602844238281, "tflops": 5.5965143147035885, "tokens_per_sec": 92.50263917021472, "iter": 5690, "memory": 16131, "step": 5690} +{"base_lr": 1.4543166679279218e-06, "lr": 1.4543166679279218e-06, "data_time": 0.008437156677246094, "loss": 0.010498046875, "time": 1.3422937393188477, "tflops": 4.867102881882151, "tokens_per_sec": 80.45928907835372, "iter": 5700, "memory": 16131, "step": 5700} +{"base_lr": 1.4523389588157109e-06, "lr": 1.4523389588157109e-06, "data_time": 0.008423089981079102, "loss": 0.06396484375, "time": 1.2344603538513184, "tflops": 7.1088344100274306, "tokens_per_sec": 117.46023235781188, "iter": 5710, "memory": 16130, "step": 5710} +{"base_lr": 1.4503590228153828e-06, "lr": 1.4503590228153828e-06, "data_time": 0.008768796920776367, "loss": 0.0029449462890625, "time": 1.0437438488006592, "tflops": 7.420371285797883, "tokens_per_sec": 122.63545327424833, "iter": 5720, "memory": 16131, "step": 5720} +{"base_lr": 1.4483768696742662e-06, "lr": 1.4483768696742662e-06, "data_time": 0.00807332992553711, "loss": 1.4453125, "time": 1.0345616340637207, "tflops": 5.84644480835408, "tokens_per_sec": 96.65929675654696, "iter": 5730, "memory": 16130, "step": 5730} +{"base_lr": 1.4463925091506027e-06, "lr": 1.4463925091506027e-06, "data_time": 0.008626699447631836, "loss": 1.7890625, "time": 1.0517644882202148, "tflops": 4.023970556029876, "tokens_per_sec": 66.55482361682198, "iter": 5740, "memory": 16131, "step": 5740} +{"base_lr": 1.4444059510135027e-06, "lr": 1.4444059510135027e-06, "data_time": 0.0088348388671875, "loss": 2.125, "time": 1.045423984527588, "tflops": 5.727765030541304, "tokens_per_sec": 94.69842041613573, "iter": 5750, "memory": 16131, "step": 5750} +{"base_lr": 1.4424172050428942e-06, "lr": 1.4424172050428942e-06, "data_time": 0.00853729248046875, "loss": 2.140625, "time": 1.2557094097137451, "tflops": 3.2258458555249567, "tokens_per_sec": 53.356293646967366, "iter": 5760, "memory": 16131, "step": 5760} +{"base_lr": 1.4404262810294765e-06, "lr": 1.4404262810294765e-06, "data_time": 0.008437395095825195, "loss": 0.048828125, "time": 1.3287339210510254, "tflops": 5.19033926107014, "tokens_per_sec": 85.79595823800484, "iter": 5770, "memory": 16130, "step": 5770} +{"base_lr": 1.4384331887746717e-06, "lr": 1.4384331887746717e-06, "data_time": 0.008423566818237305, "loss": 0.0703125, "time": 1.2526895999908447, "tflops": 4.683377789797404, "tokens_per_sec": 77.43338812793806, "iter": 5780, "memory": 16131, "step": 5780} +{"base_lr": 1.436437938090576e-06, "lr": 1.436437938090576e-06, "data_time": 0.008396387100219727, "loss": 0.039794921875, "time": 1.0186660289764404, "tflops": 6.472838539644698, "tokens_per_sec": 107.00268478513671, "iter": 5790, "memory": 16130, "step": 5790} +{"base_lr": 1.4344405387999126e-06, "lr": 1.4344405387999126e-06, "data_time": 0.008224248886108398, "loss": 1.890625, "time": 0.99997878074646, "tflops": 5.14031685352145, "tokens_per_sec": 85.00180367473854, "iter": 5800, "memory": 16131, "step": 5800} +{"base_lr": 1.4324410007359803e-06, "lr": 1.4324410007359803e-06, "data_time": 0.008633613586425781, "loss": 2.265625, "time": 1.0047602653503418, "tflops": 4.8146027034901, "tokens_per_sec": 79.62098299341666, "iter": 5810, "memory": 16130, "step": 5810} +{"base_lr": 1.4304393337426096e-06, "lr": 1.4304393337426096e-06, "data_time": 0.007999897003173828, "loss": 0.09716796875, "time": 1.0123302936553955, "tflops": 6.932281651527819, "tokens_per_sec": 114.58710731753784, "iter": 5820, "memory": 16131, "step": 5820} +{"base_lr": 1.4284355476741106e-06, "lr": 1.4284355476741106e-06, "data_time": 0.009099721908569336, "loss": 0.01422119140625, "time": 1.0132906436920166, "tflops": 6.447391740602356, "tokens_per_sec": 106.58343750849765, "iter": 5830, "memory": 16131, "step": 5830} +{"base_lr": 1.4264296523952254e-06, "lr": 1.4264296523952254e-06, "data_time": 0.008516311645507812, "loss": 0.035888671875, "time": 1.018162727355957, "tflops": 7.249660297890852, "tokens_per_sec": 119.8236752554271, "iter": 5840, "memory": 16130, "step": 5840} +{"base_lr": 1.4244216577810804e-06, "lr": 1.4244216577810804e-06, "data_time": 0.008328437805175781, "loss": 0.005523681640625, "time": 1.0046086311340332, "tflops": 6.201629765351453, "tokens_per_sec": 102.5274886237319, "iter": 5850, "memory": 16130, "step": 5850} +{"base_lr": 1.422411573717138e-06, "lr": 1.422411573717138e-06, "data_time": 0.008368492126464844, "loss": 1.6953125, "time": 1.196803331375122, "tflops": 4.092615226102406, "tokens_per_sec": 67.68029289061525, "iter": 5860, "memory": 16130, "step": 5860} +{"base_lr": 1.4203994100991454e-06, "lr": 1.4203994100991454e-06, "data_time": 0.008900880813598633, "loss": 0.2314453125, "time": 1.014218807220459, "tflops": 5.903995171492128, "tokens_per_sec": 97.61207275500948, "iter": 5870, "memory": 16131, "step": 5870} +{"base_lr": 1.4183851768330879e-06, "lr": 1.4183851768330879e-06, "data_time": 0.00913095474243164, "loss": 0.01226806640625, "time": 1.015669822692871, "tflops": 8.043242243205121, "tokens_per_sec": 132.9172108726615, "iter": 5880, "memory": 16130, "step": 5880} +{"base_lr": 1.416368883835141e-06, "lr": 1.416368883835141e-06, "data_time": 0.008488893508911133, "loss": 2.453125, "time": 1.022817611694336, "tflops": 5.676729503763663, "tokens_per_sec": 93.85837602158465, "iter": 5890, "memory": 16131, "step": 5890} +{"base_lr": 1.414350541031619e-06, "lr": 1.414350541031619e-06, "data_time": 0.009112119674682617, "loss": 1.78125, "time": 1.0127575397491455, "tflops": 4.477750263818115, "tokens_per_sec": 74.05523736560187, "iter": 5900, "memory": 16130, "step": 5900} +{"base_lr": 1.412330158358928e-06, "lr": 1.412330158358928e-06, "data_time": 0.008639335632324219, "loss": 0.06201171875, "time": 1.0379247665405273, "tflops": 5.3023923226518415, "tokens_per_sec": 87.67494806316397, "iter": 5910, "memory": 16130, "step": 5910} +{"base_lr": 1.4103077457635167e-06, "lr": 1.4103077457635167e-06, "data_time": 0.01172947883605957, "loss": 0.0016326904296875, "time": 1.0150878429412842, "tflops": 7.032833733761803, "tokens_per_sec": 116.2460971436432, "iter": 5920, "memory": 16131, "step": 5920} +{"base_lr": 1.4082833132018268e-06, "lr": 1.4082833132018268e-06, "data_time": 0.008364439010620117, "loss": 1.703125, "time": 1.0107147693634033, "tflops": 6.044310263321613, "tokens_per_sec": 99.92928080344043, "iter": 5930, "memory": 16131, "step": 5930} +{"base_lr": 1.4062568706402451e-06, "lr": 1.4062568706402451e-06, "data_time": 0.0085906982421875, "loss": 0.0264892578125, "time": 1.006300449371338, "tflops": 6.732986537627047, "tokens_per_sec": 111.29876774859636, "iter": 5940, "memory": 16131, "step": 5940} +{"base_lr": 1.4042284280550534e-06, "lr": 1.4042284280550534e-06, "data_time": 0.008822441101074219, "loss": 0.0751953125, "time": 1.0154716968536377, "tflops": 6.851169262234889, "tokens_per_sec": 113.247863388222, "iter": 5950, "memory": 16131, "step": 5950} +{"base_lr": 1.402197995432379e-06, "lr": 1.402197995432379e-06, "data_time": 0.008704900741577148, "loss": 0.01361083984375, "time": 1.0038094520568848, "tflops": 5.361959585175956, "tokens_per_sec": 88.66224542670255, "iter": 5960, "memory": 16131, "step": 5960} +{"base_lr": 1.4001655827681465e-06, "lr": 1.4001655827681465e-06, "data_time": 0.00882863998413086, "loss": 0.01177978515625, "time": 1.0384149551391602, "tflops": 6.2913979567004565, "tokens_per_sec": 104.0046654426531, "iter": 5970, "memory": 16130, "step": 5970} +{"base_lr": 1.3981312000680293e-06, "lr": 1.3981312000680293e-06, "data_time": 0.008381366729736328, "loss": 1.96875, "time": 1.0334961414337158, "tflops": 5.207939824216552, "tokens_per_sec": 86.11546423042158, "iter": 5980, "memory": 16130, "step": 5980} +{"base_lr": 1.396094857347399e-06, "lr": 1.396094857347399e-06, "data_time": 0.008638858795166016, "loss": 0.048828125, "time": 1.1932485103607178, "tflops": 5.728892804822155, "tokens_per_sec": 94.69946873492891, "iter": 5990, "memory": 16131, "step": 5990} +{"base_lr": 1.394056564631276e-06, "lr": 1.394056564631276e-06, "data_time": 0.008797645568847656, "loss": 1.65625, "time": 1.102278470993042, "tflops": 3.5650732263836806, "tokens_per_sec": 58.96876489058393, "iter": 6000, "memory": 16131, "step": 6000} +{"base_lr": 1.3920163319542807e-06, "lr": 1.3920163319542807e-06, "data_time": 0.008618831634521484, "loss": 0.0296630859375, "time": 0.9960112571716309, "tflops": 6.6200664740571025, "tokens_per_sec": 109.43651411071139, "iter": 6010, "memory": 16132, "step": 6010} +{"base_lr": 1.3899741693605843e-06, "lr": 1.3899741693605843e-06, "data_time": 0.008430957794189453, "loss": 0.019775390625, "time": 0.9893791675567627, "tflops": 6.848140329411608, "tokens_per_sec": 113.20230268893461, "iter": 6020, "memory": 16131, "step": 6020} +{"base_lr": 1.3879300869038589e-06, "lr": 1.3879300869038589e-06, "data_time": 0.008415460586547852, "loss": 1.609375, "time": 1.0048258304595947, "tflops": 4.392630032761775, "tokens_per_sec": 72.64940628221913, "iter": 6030, "memory": 16131, "step": 6030} +{"base_lr": 1.3858840946472285e-06, "lr": 1.3858840946472285e-06, "data_time": 0.008335590362548828, "loss": 1.9921875, "time": 1.016373872756958, "tflops": 7.679829452179199, "tokens_per_sec": 126.92179861919807, "iter": 6040, "memory": 16131, "step": 6040} +{"base_lr": 1.3838362026632188e-06, "lr": 1.3838362026632188e-06, "data_time": 0.008728981018066406, "loss": 2.140625, "time": 0.997981071472168, "tflops": 4.36211371472795, "tokens_per_sec": 72.14565692485262, "iter": 6050, "memory": 16131, "step": 6050} +{"base_lr": 1.3817864210337083e-06, "lr": 1.3817864210337083e-06, "data_time": 0.008628368377685547, "loss": 0.00518798828125, "time": 1.295414924621582, "tflops": 5.370610098680903, "tokens_per_sec": 88.77464495285565, "iter": 6060, "memory": 16131, "step": 6060} +{"base_lr": 1.379734759849878e-06, "lr": 1.379734759849878e-06, "data_time": 0.008481740951538086, "loss": 0.0849609375, "time": 1.0109515190124512, "tflops": 7.181487554185474, "tokens_per_sec": 118.70005410061937, "iter": 6070, "memory": 16131, "step": 6070} +{"base_lr": 1.3776812292121618e-06, "lr": 1.3776812292121618e-06, "data_time": 0.009057283401489258, "loss": 0.0478515625, "time": 1.2563891410827637, "tflops": 5.392761809917841, "tokens_per_sec": 89.14435530968422, "iter": 6080, "memory": 16130, "step": 6080} +{"base_lr": 1.3756258392301975e-06, "lr": 1.3756258392301975e-06, "data_time": 0.008758306503295898, "loss": 2.4375, "time": 1.3520748615264893, "tflops": 4.607888931784072, "tokens_per_sec": 76.17921383704824, "iter": 6090, "memory": 16130, "step": 6090} +{"base_lr": 1.3735686000227766e-06, "lr": 1.3735686000227766e-06, "data_time": 0.008780956268310547, "loss": 0.0111083984375, "time": 1.0023572444915771, "tflops": 6.457282933559853, "tokens_per_sec": 106.74836799744641, "iter": 6100, "memory": 16133, "step": 6100} +{"base_lr": 1.3715095217177938e-06, "lr": 1.3715095217177938e-06, "data_time": 0.008749008178710938, "loss": 0.0174560546875, "time": 1.0273091793060303, "tflops": 8.07013595188973, "tokens_per_sec": 133.3580997420982, "iter": 6110, "memory": 16130, "step": 6110} +{"base_lr": 1.3694486144521995e-06, "lr": 1.3694486144521995e-06, "data_time": 0.008884191513061523, "loss": 2.546875, "time": 1.0408477783203125, "tflops": 5.3456769934655775, "tokens_per_sec": 88.38948587504152, "iter": 6120, "memory": 16131, "step": 6120} +{"base_lr": 1.3673858883719458e-06, "lr": 1.3673858883719458e-06, "data_time": 0.00848531723022461, "loss": 0.007720947265625, "time": 1.0491399765014648, "tflops": 7.09336747359995, "tokens_per_sec": 117.23888399529595, "iter": 6130, "memory": 16131, "step": 6130} +{"base_lr": 1.3653213536319405e-06, "lr": 1.3653213536319405e-06, "data_time": 0.008714437484741211, "loss": 2.34375, "time": 0.9870307445526123, "tflops": 4.83975709489415, "tokens_per_sec": 80.03803370454078, "iter": 6140, "memory": 16131, "step": 6140} +{"base_lr": 1.3632550203959943e-06, "lr": 1.3632550203959943e-06, "data_time": 0.00822591781616211, "loss": 0.064453125, "time": 0.9959478378295898, "tflops": 4.431786437922655, "tokens_per_sec": 73.29701137663122, "iter": 6150, "memory": 16131, "step": 6150} +{"base_lr": 1.3611868988367727e-06, "lr": 1.3611868988367727e-06, "data_time": 0.008795738220214844, "loss": 1.953125, "time": 1.006230354309082, "tflops": 3.9655069141060393, "tokens_per_sec": 65.59134269533405, "iter": 6160, "memory": 16130, "step": 6160} +{"base_lr": 1.359116999135746e-06, "lr": 1.359116999135746e-06, "data_time": 0.008676528930664062, "loss": 1.7265625, "time": 1.0106127262115479, "tflops": 4.4273645203741205, "tokens_per_sec": 73.22290535300128, "iter": 6170, "memory": 16130, "step": 6170} +{"base_lr": 1.3570453314831373e-06, "lr": 1.3570453314831373e-06, "data_time": 0.008481740951538086, "loss": 0.0130615234375, "time": 1.2077956199645996, "tflops": 5.158331994574917, "tokens_per_sec": 85.27932896704299, "iter": 6180, "memory": 16131, "step": 6180} +{"base_lr": 1.3549719060778731e-06, "lr": 1.3549719060778731e-06, "data_time": 0.008807182312011719, "loss": 0.00994873046875, "time": 1.009511947631836, "tflops": 7.071678588169088, "tokens_per_sec": 116.88816588718286, "iter": 6190, "memory": 16130, "step": 6190} +{"base_lr": 1.3528967331275351e-06, "lr": 1.3528967331275351e-06, "data_time": 0.008340597152709961, "loss": 0.00286865234375, "time": 1.244638204574585, "tflops": 6.173968745055777, "tokens_per_sec": 102.0376841503964, "iter": 6200, "memory": 16131, "step": 6200} +{"base_lr": 1.3508198228483064e-06, "lr": 1.3508198228483064e-06, "data_time": 0.008597612380981445, "loss": 0.0216064453125, "time": 1.0065340995788574, "tflops": 7.152801964997704, "tokens_per_sec": 118.22748980851458, "iter": 6210, "memory": 16131, "step": 6210} +{"base_lr": 1.3487411854649244e-06, "lr": 1.3487411854649244e-06, "data_time": 0.008760213851928711, "loss": 1.6953125, "time": 1.0075984001159668, "tflops": 4.380543002116341, "tokens_per_sec": 72.44949971290725, "iter": 6220, "memory": 16130, "step": 6220} +{"base_lr": 1.3466608312106281e-06, "lr": 1.3466608312106281e-06, "data_time": 0.008752822875976562, "loss": 0.2197265625, "time": 0.9965662956237793, "tflops": 6.373238163285597, "tokens_per_sec": 105.3617812091188, "iter": 6230, "memory": 16130, "step": 6230} +{"base_lr": 1.3445787703271096e-06, "lr": 1.3445787703271096e-06, "data_time": 0.008851289749145508, "loss": 0.0361328125, "time": 0.9942753314971924, "tflops": 6.814417660571636, "tokens_per_sec": 112.64485445016155, "iter": 6240, "memory": 16130, "step": 6240} +{"base_lr": 1.3424950130644623e-06, "lr": 1.3424950130644623e-06, "data_time": 0.008968830108642578, "loss": 1.453125, "time": 1.0406074523925781, "tflops": 3.7763552970152174, "tokens_per_sec": 62.463515757540165, "iter": 6250, "memory": 16130, "step": 6250} +{"base_lr": 1.340409569681131e-06, "lr": 1.340409569681131e-06, "data_time": 0.008655309677124023, "loss": 1.3515625, "time": 1.0366618633270264, "tflops": 4.608049372527596, "tokens_per_sec": 76.2061408783612, "iter": 6260, "memory": 16131, "step": 6260} +{"base_lr": 1.3383224504438617e-06, "lr": 1.3383224504438617e-06, "data_time": 0.008957386016845703, "loss": 0.000820159912109375, "time": 1.009840726852417, "tflops": 7.549457320774725, "tokens_per_sec": 124.77215133975228, "iter": 6270, "memory": 16130, "step": 6270} +{"base_lr": 1.3362336656276498e-06, "lr": 1.3362336656276498e-06, "data_time": 0.008631706237792969, "loss": 0.0047607421875, "time": 1.0822553634643555, "tflops": 6.260451652313654, "tokens_per_sec": 103.48759061944374, "iter": 6280, "memory": 16130, "step": 6280} +{"base_lr": 1.334143225515692e-06, "lr": 1.334143225515692e-06, "data_time": 0.008478403091430664, "loss": 1.7421875, "time": 1.2590675354003906, "tflops": 3.361431546347315, "tokens_per_sec": 55.59669996390146, "iter": 6290, "memory": 16131, "step": 6290} +{"base_lr": 1.332051140399333e-06, "lr": 1.332051140399333e-06, "data_time": 0.00862431526184082, "loss": 0.061767578125, "time": 1.0123648643493652, "tflops": 6.572967490046218, "tokens_per_sec": 108.65647739620736, "iter": 6300, "memory": 16131, "step": 6300} +{"base_lr": 1.329957420578016e-06, "lr": 1.329957420578016e-06, "data_time": 0.008659124374389648, "loss": 2.09375, "time": 1.000124216079712, "tflops": 5.3817172173317624, "tokens_per_sec": 88.98894614188356, "iter": 6310, "memory": 16131, "step": 6310} +{"base_lr": 1.3278620763592319e-06, "lr": 1.3278620763592319e-06, "data_time": 0.008481025695800781, "loss": 1.859375, "time": 1.0015993118286133, "tflops": 5.73652628772715, "tokens_per_sec": 94.84830797903032, "iter": 6320, "memory": 16131, "step": 6320} +{"base_lr": 1.3257651180584692e-06, "lr": 1.3257651180584692e-06, "data_time": 0.008529186248779297, "loss": 1.734375, "time": 1.027479648590088, "tflops": 8.77695002391332, "tokens_per_sec": 145.01503772294996, "iter": 6330, "memory": 16131, "step": 6330} +{"base_lr": 1.3236665559991618e-06, "lr": 1.3236665559991618e-06, "data_time": 0.009041786193847656, "loss": 0.921875, "time": 1.013559341430664, "tflops": 3.5189625944880816, "tokens_per_sec": 58.21070122708536, "iter": 6340, "memory": 16130, "step": 6340} +{"base_lr": 1.321566400512639e-06, "lr": 1.321566400512639e-06, "data_time": 0.00879526138305664, "loss": 1.390625, "time": 1.0352580547332764, "tflops": 3.971220653415971, "tokens_per_sec": 65.68410618882247, "iter": 6350, "memory": 16130, "step": 6350} +{"base_lr": 1.3194646619380757e-06, "lr": 1.3194646619380757e-06, "data_time": 0.00854802131652832, "loss": 1.828125, "time": 1.0195212364196777, "tflops": 9.499869819919686, "tokens_per_sec": 156.93640729027476, "iter": 6360, "memory": 16131, "step": 6360} +{"base_lr": 1.3173613506224397e-06, "lr": 1.3173613506224397e-06, "data_time": 0.009057044982910156, "loss": 2.03125, "time": 1.0132884979248047, "tflops": 7.882672115337539, "tokens_per_sec": 130.26892170413774, "iter": 6370, "memory": 16131, "step": 6370} +{"base_lr": 1.3152564769204413e-06, "lr": 1.3152564769204413e-06, "data_time": 0.009070158004760742, "loss": 2.734375, "time": 0.9920988082885742, "tflops": 5.730421414691003, "tokens_per_sec": 94.74862706675407, "iter": 6380, "memory": 16133, "step": 6380} +{"base_lr": 1.3131500511944836e-06, "lr": 1.3131500511944836e-06, "data_time": 0.008321285247802734, "loss": 1.9296875, "time": 1.0182573795318604, "tflops": 2.968057750375938, "tokens_per_sec": 49.10349878626776, "iter": 6390, "memory": 16131, "step": 6390} +{"base_lr": 1.3110420838146082e-06, "lr": 1.3110420838146082e-06, "data_time": 0.008952140808105469, "loss": 0.000514984130859375, "time": 1.274562120437622, "tflops": 4.935641554659665, "tokens_per_sec": 81.59665059260502, "iter": 6400, "memory": 16131, "step": 6400} +{"base_lr": 1.308932585158449e-06, "lr": 1.308932585158449e-06, "data_time": 0.008725404739379883, "loss": 1.2890625, "time": 1.0198712348937988, "tflops": 4.802621516377252, "tokens_per_sec": 79.4217909365345, "iter": 6410, "memory": 16131, "step": 6410} +{"base_lr": 1.3068215656111759e-06, "lr": 1.3068215656111759e-06, "data_time": 0.00862431526184082, "loss": 0.003692626953125, "time": 1.0135180950164795, "tflops": 6.744815745256234, "tokens_per_sec": 111.49282933922473, "iter": 6420, "memory": 16131, "step": 6420} +{"base_lr": 1.3047090355654477e-06, "lr": 1.3047090355654477e-06, "data_time": 0.008276700973510742, "loss": 2.25, "time": 1.252765417098999, "tflops": 4.8281245729161055, "tokens_per_sec": 79.82340399488992, "iter": 6430, "memory": 16132, "step": 6430} +{"base_lr": 1.3025950054213595e-06, "lr": 1.3025950054213595e-06, "data_time": 0.008724689483642578, "loss": 1.5546875, "time": 0.9982080459594727, "tflops": 3.8155256038948195, "tokens_per_sec": 63.11309576690659, "iter": 6440, "memory": 16130, "step": 6440} +{"base_lr": 1.300479485586391e-06, "lr": 1.300479485586391e-06, "data_time": 0.009004592895507812, "loss": 0.00469970703125, "time": 1.0219662189483643, "tflops": 7.22267895400731, "tokens_per_sec": 119.37772280322777, "iter": 6450, "memory": 16131, "step": 6450} +{"base_lr": 1.2983624864753557e-06, "lr": 1.2983624864753557e-06, "data_time": 0.008596181869506836, "loss": 1.7265625, "time": 1.0112452507019043, "tflops": 4.424595245082172, "tokens_per_sec": 73.177105107355, "iter": 6460, "memory": 16131, "step": 6460} +{"base_lr": 1.29624401851035e-06, "lr": 1.29624401851035e-06, "data_time": 0.008503437042236328, "loss": 0.0059814453125, "time": 1.0081255435943604, "tflops": 6.8409930506831165, "tokens_per_sec": 113.08115415212325, "iter": 6470, "memory": 16130, "step": 6470} +{"base_lr": 1.2941240921207017e-06, "lr": 1.2941240921207017e-06, "data_time": 0.008389949798583984, "loss": 1.1484375, "time": 1.0154609680175781, "tflops": 4.704256686648544, "tokens_per_sec": 77.79718028369818, "iter": 6480, "memory": 16131, "step": 6480} +{"base_lr": 1.2920027177429183e-06, "lr": 1.2920027177429183e-06, "data_time": 0.008329153060913086, "loss": 1.640625, "time": 1.0111398696899414, "tflops": 4.724360290819216, "tokens_per_sec": 78.12964592539174, "iter": 6490, "memory": 16131, "step": 6490} +{"base_lr": 1.2898799058206351e-06, "lr": 1.2898799058206351e-06, "data_time": 0.00912165641784668, "loss": 1.8125, "time": 1.2249231338500977, "tflops": 5.284008562923264, "tokens_per_sec": 87.35241995438301, "iter": 6500, "memory": 16130, "step": 6500} +{"base_lr": 1.287755666804565e-06, "lr": 1.287755666804565e-06, "data_time": 0.008774518966674805, "loss": 2.0625, "time": 1.0250837802886963, "tflops": 6.4323139807446585, "tokens_per_sec": 106.33277210687675, "iter": 6510, "memory": 16131, "step": 6510} +{"base_lr": 1.2856300111524479e-06, "lr": 1.2856300111524479e-06, "data_time": 0.008492469787597656, "loss": 2.0, "time": 1.0141034126281738, "tflops": 4.352443809584141, "tokens_per_sec": 71.9847690983896, "iter": 6520, "memory": 16131, "step": 6520} +{"base_lr": 1.2835029493289953e-06, "lr": 1.2835029493289953e-06, "data_time": 0.008346796035766602, "loss": 0.1328125, "time": 1.2549731731414795, "tflops": 4.867891827888839, "tokens_per_sec": 80.4798079843363, "iter": 6530, "memory": 16131, "step": 6530} +{"base_lr": 1.281374491805843e-06, "lr": 1.281374491805843e-06, "data_time": 0.008779764175415039, "loss": 2.25, "time": 1.0063529014587402, "tflops": 6.190880734123112, "tokens_per_sec": 102.34978192102979, "iter": 6540, "memory": 16131, "step": 6540} +{"base_lr": 1.2792446490614984e-06, "lr": 1.2792446490614984e-06, "data_time": 0.008798360824584961, "loss": 0.0076904296875, "time": 1.0262205600738525, "tflops": 8.019621804224727, "tokens_per_sec": 132.52511720295308, "iter": 6550, "memory": 16131, "step": 6550} +{"base_lr": 1.2771134315812867e-06, "lr": 1.2771134315812867e-06, "data_time": 0.008794307708740234, "loss": 0.0224609375, "time": 1.3350074291229248, "tflops": 4.666798591124295, "tokens_per_sec": 77.15312870400425, "iter": 6560, "memory": 16131, "step": 6560} +{"base_lr": 1.2749808498573025e-06, "lr": 1.2749808498573025e-06, "data_time": 0.008525371551513672, "loss": 0.0242919921875, "time": 1.0311541557312012, "tflops": 6.0419780638444465, "tokens_per_sec": 99.88807146576627, "iter": 6570, "memory": 16131, "step": 6570} +{"base_lr": 1.2728469143883555e-06, "lr": 1.2728469143883555e-06, "data_time": 0.008816719055175781, "loss": 1.7109375, "time": 1.2480733394622803, "tflops": 3.5365134251438435, "tokens_per_sec": 58.49015253494063, "iter": 6580, "memory": 16131, "step": 6580} +{"base_lr": 1.2707116356799205e-06, "lr": 1.2707116356799205e-06, "data_time": 0.00945591926574707, "loss": 2.296875, "time": 1.0191524028778076, "tflops": 6.64808065927476, "tokens_per_sec": 109.89524204979817, "iter": 6590, "memory": 16131, "step": 6590} +{"base_lr": 1.2685750242440857e-06, "lr": 1.2685750242440857e-06, "data_time": 0.00853872299194336, "loss": 0.11181640625, "time": 0.9946908950805664, "tflops": 6.202571521724925, "tokens_per_sec": 102.54441907969392, "iter": 6600, "memory": 16131, "step": 6600} +{"base_lr": 1.2664370905994998e-06, "lr": 1.2664370905994998e-06, "data_time": 0.008564949035644531, "loss": 1.9609375, "time": 1.015854835510254, "tflops": 4.047072499789715, "tokens_per_sec": 66.93869795459243, "iter": 6610, "memory": 16130, "step": 6610} +{"base_lr": 1.2642978452713195e-06, "lr": 1.2642978452713195e-06, "data_time": 0.008969306945800781, "loss": 0.001434326171875, "time": 1.0077581405639648, "tflops": 6.783366494607164, "tokens_per_sec": 112.13007908489872, "iter": 6620, "memory": 16130, "step": 6620} +{"base_lr": 1.262157298791162e-06, "lr": 1.262157298791162e-06, "data_time": 0.008797168731689453, "loss": 1.7578125, "time": 1.210519790649414, "tflops": 4.696448831668719, "tokens_per_sec": 77.65259248631834, "iter": 6630, "memory": 16131, "step": 6630} +{"base_lr": 1.260015461697048e-06, "lr": 1.260015461697048e-06, "data_time": 0.008932113647460938, "loss": 0.02392578125, "time": 1.0112578868865967, "tflops": 5.861393092140936, "tokens_per_sec": 96.90900933452288, "iter": 6640, "memory": 16130, "step": 6640} +{"base_lr": 1.2578723445333524e-06, "lr": 1.2578723445333524e-06, "data_time": 0.008601188659667969, "loss": 1.5078125, "time": 1.0287413597106934, "tflops": 4.114026613711828, "tokens_per_sec": 68.04431389792437, "iter": 6650, "memory": 16131, "step": 6650} +{"base_lr": 1.255727957850752e-06, "lr": 1.255727957850752e-06, "data_time": 0.008272409439086914, "loss": 0.00189971923828125, "time": 1.0014965534210205, "tflops": 7.3703038478671425, "tokens_per_sec": 121.81769331420797, "iter": 6660, "memory": 16131, "step": 6660} +{"base_lr": 1.2535823122061737e-06, "lr": 1.2535823122061737e-06, "data_time": 0.009047508239746094, "loss": 0.00107574462890625, "time": 1.0018844604492188, "tflops": 7.911895399473424, "tokens_per_sec": 130.75360001205354, "iter": 6670, "memory": 16131, "step": 6670} +{"base_lr": 1.2514354181627408e-06, "lr": 1.2514354181627408e-06, "data_time": 0.009181499481201172, "loss": 1.3046875, "time": 1.3078892230987549, "tflops": 3.3284981954316213, "tokens_per_sec": 55.05053388952685, "iter": 6680, "memory": 16131, "step": 6680} +{"base_lr": 1.2492872862897252e-06, "lr": 1.2492872862897252e-06, "data_time": 0.008400201797485352, "loss": 0.00347900390625, "time": 1.020009994506836, "tflops": 6.820686575249584, "tokens_per_sec": 112.74399331301507, "iter": 6690, "memory": 16131, "step": 6690} +{"base_lr": 1.2471379271624905e-06, "lr": 1.2471379271624905e-06, "data_time": 0.009161949157714844, "loss": 1.40625, "time": 1.010868787765503, "tflops": 4.965163142553522, "tokens_per_sec": 82.10759002994548, "iter": 6700, "memory": 16131, "step": 6700} +{"base_lr": 1.2449873513624445e-06, "lr": 1.2449873513624445e-06, "data_time": 0.008266925811767578, "loss": 1.40625, "time": 1.0034635066986084, "tflops": 4.036743706200971, "tokens_per_sec": 66.76874600090142, "iter": 6710, "memory": 16130, "step": 6710} +{"base_lr": 1.2428355694769818e-06, "lr": 1.2428355694769818e-06, "data_time": 0.008746623992919922, "loss": 0.005340576171875, "time": 1.2854185104370117, "tflops": 6.355345248806386, "tokens_per_sec": 105.02416053896577, "iter": 6720, "memory": 16133, "step": 6720} +{"base_lr": 1.2406825920994362e-06, "lr": 1.2406825920994362e-06, "data_time": 0.008968591690063477, "loss": 1.9453125, "time": 1.2474446296691895, "tflops": 5.820006418721365, "tokens_per_sec": 96.19665446130996, "iter": 6730, "memory": 16131, "step": 6730} +{"base_lr": 1.238528429829027e-06, "lr": 1.238528429829027e-06, "data_time": 0.008239030838012695, "loss": 2.171875, "time": 1.0114555358886719, "tflops": 5.560886402810219, "tokens_per_sec": 91.94670126373632, "iter": 6740, "memory": 16131, "step": 6740} +{"base_lr": 1.2363730932708064e-06, "lr": 1.2363730932708064e-06, "data_time": 0.009282588958740234, "loss": 1.765625, "time": 1.0316145420074463, "tflops": 4.2198968138201725, "tokens_per_sec": 69.79351014170804, "iter": 6750, "memory": 16130, "step": 6750} +{"base_lr": 1.2342165930356085e-06, "lr": 1.2342165930356085e-06, "data_time": 0.008162736892700195, "loss": 1.5, "time": 1.0198240280151367, "tflops": 4.446723372091645, "tokens_per_sec": 73.54209936188448, "iter": 6760, "memory": 16130, "step": 6760} +{"base_lr": 1.2320589397399943e-06, "lr": 1.2320589397399943e-06, "data_time": 0.008396625518798828, "loss": 1.78125, "time": 0.9877581596374512, "tflops": 4.28472221786468, "tokens_per_sec": 70.86754922441955, "iter": 6770, "memory": 16131, "step": 6770} +{"base_lr": 1.229900144006204e-06, "lr": 1.229900144006204e-06, "data_time": 0.00878143310546875, "loss": 1.9453125, "time": 1.009807825088501, "tflops": 4.370958523900124, "tokens_per_sec": 72.29098268626497, "iter": 6780, "memory": 16132, "step": 6780} +{"base_lr": 1.2277402164621008e-06, "lr": 1.2277402164621008e-06, "data_time": 0.008890390396118164, "loss": 1.9375, "time": 1.0321059226989746, "tflops": 6.74075046285961, "tokens_per_sec": 111.42267229623255, "iter": 6790, "memory": 16131, "step": 6790} +{"base_lr": 1.2255791677411194e-06, "lr": 1.2255791677411194e-06, "data_time": 0.008545875549316406, "loss": 1.6953125, "time": 1.0121588706970215, "tflops": 5.138274242593333, "tokens_per_sec": 84.96689846791668, "iter": 6800, "memory": 16130, "step": 6800} +{"base_lr": 1.2234170084822145e-06, "lr": 1.2234170084822145e-06, "data_time": 0.008949518203735352, "loss": 1.9765625, "time": 1.0085861682891846, "tflops": 4.256242508418878, "tokens_per_sec": 70.39557177386581, "iter": 6810, "memory": 16131, "step": 6810} +{"base_lr": 1.221253749329808e-06, "lr": 1.221253749329808e-06, "data_time": 0.008491754531860352, "loss": 2.328125, "time": 1.015773057937622, "tflops": 6.670197959553877, "tokens_per_sec": 110.26084923662897, "iter": 6820, "memory": 16130, "step": 6820} +{"base_lr": 1.2190894009337371e-06, "lr": 1.2190894009337371e-06, "data_time": 0.0074138641357421875, "loss": 1.6171875, "time": 0.9840269088745117, "tflops": 5.592818919849518, "tokens_per_sec": 92.47714587804258, "iter": 6830, "memory": 16131, "step": 6830} +{"base_lr": 1.2169239739492003e-06, "lr": 1.2169239739492003e-06, "data_time": 0.008954763412475586, "loss": 0.11181640625, "time": 1.0163459777832031, "tflops": 6.189606593757483, "tokens_per_sec": 102.32735925883884, "iter": 6840, "memory": 16131, "step": 6840} +{"base_lr": 1.2147574790367079e-06, "lr": 1.2147574790367079e-06, "data_time": 0.008650541305541992, "loss": 2.4375, "time": 1.0103769302368164, "tflops": 5.686690392592012, "tokens_per_sec": 94.0243162298247, "iter": 6850, "memory": 16131, "step": 6850} +{"base_lr": 1.2125899268620254e-06, "lr": 1.2125899268620254e-06, "data_time": 0.0076351165771484375, "loss": 0.001922607421875, "time": 1.0024707317352295, "tflops": 8.572488317726934, "tokens_per_sec": 141.65002079817637, "iter": 6860, "memory": 16131, "step": 6860} +{"base_lr": 1.2104213280961249e-06, "lr": 1.2104213280961249e-06, "data_time": 0.00854182243347168, "loss": 1.578125, "time": 1.2346653938293457, "tflops": 5.389509882346596, "tokens_per_sec": 89.09296441746305, "iter": 6870, "memory": 16130, "step": 6870} +{"base_lr": 1.2082516934151308e-06, "lr": 1.2082516934151308e-06, "data_time": 0.008342742919921875, "loss": 0.00128936767578125, "time": 1.037912368774414, "tflops": 7.637258493019481, "tokens_per_sec": 126.21489437934049, "iter": 6880, "memory": 16131, "step": 6880} +{"base_lr": 1.2060810335002664e-06, "lr": 1.2060810335002664e-06, "data_time": 0.008574724197387695, "loss": 1.765625, "time": 1.0378928184509277, "tflops": 4.777556122209229, "tokens_per_sec": 79.00623122367043, "iter": 6890, "memory": 16131, "step": 6890} +{"base_lr": 1.2039093590378037e-06, "lr": 1.2039093590378037e-06, "data_time": 0.009007453918457031, "loss": 0.0113525390625, "time": 1.015631914138794, "tflops": 7.745119679290895, "tokens_per_sec": 127.9991286115754, "iter": 6900, "memory": 16131, "step": 6900} +{"base_lr": 1.2017366807190075e-06, "lr": 1.2017366807190075e-06, "data_time": 0.008626937866210938, "loss": 0.03271484375, "time": 1.0266172885894775, "tflops": 7.426087163043237, "tokens_per_sec": 122.73317564425119, "iter": 6910, "memory": 16131, "step": 6910} +{"base_lr": 1.1995630092400859e-06, "lr": 1.1995630092400859e-06, "data_time": 0.008728504180908203, "loss": 0.09228515625, "time": 1.0216865539550781, "tflops": 6.512996883118483, "tokens_per_sec": 107.66511468127713, "iter": 6920, "memory": 16130, "step": 6920} +{"base_lr": 1.1973883553021364e-06, "lr": 1.1973883553021364e-06, "data_time": 0.00915384292602539, "loss": 0.0157470703125, "time": 1.0247185230255127, "tflops": 5.252550424535207, "tokens_per_sec": 86.8531191737785, "iter": 6930, "memory": 16132, "step": 6930} +{"base_lr": 1.1952127296110933e-06, "lr": 1.1952127296110933e-06, "data_time": 0.008169412612915039, "loss": 1.8984375, "time": 1.0118365287780762, "tflops": 6.157329382932666, "tokens_per_sec": 101.79509937665925, "iter": 6940, "memory": 16130, "step": 6940} +{"base_lr": 1.193036142877673e-06, "lr": 1.193036142877673e-06, "data_time": 0.008893966674804688, "loss": 0.006744384765625, "time": 1.0236499309539795, "tflops": 7.210799002766341, "tokens_per_sec": 119.18136885544868, "iter": 6950, "memory": 16132, "step": 6950} +{"base_lr": 1.1908586058173264e-06, "lr": 1.1908586058173264e-06, "data_time": 0.00897526741027832, "loss": 1.2890625, "time": 1.2292370796203613, "tflops": 3.3445445445254336, "tokens_per_sec": 55.31886495072688, "iter": 6960, "memory": 16133, "step": 6960} +{"base_lr": 1.1886801291501806e-06, "lr": 1.1886801291501806e-06, "data_time": 0.008398294448852539, "loss": 1.6328125, "time": 1.0134379863739014, "tflops": 5.550008399325279, "tokens_per_sec": 91.76683847490642, "iter": 6970, "memory": 16131, "step": 6970} +{"base_lr": 1.1865007236009887e-06, "lr": 1.1865007236009887e-06, "data_time": 0.008649587631225586, "loss": 2.453125, "time": 1.028923749923706, "tflops": 6.231684841086823, "tokens_per_sec": 103.02026754437031, "iter": 6980, "memory": 16131, "step": 6980} +{"base_lr": 1.1843203998990779e-06, "lr": 1.1843203998990779e-06, "data_time": 0.009035348892211914, "loss": 1.90625, "time": 1.2918763160705566, "tflops": 5.197729437086505, "tokens_per_sec": 85.92153801343606, "iter": 6990, "memory": 16131, "step": 6990} +{"base_lr": 1.1821391687782938e-06, "lr": 1.1821391687782938e-06, "data_time": 0.008224248886108398, "loss": 1.53125, "time": 1.0343372821807861, "tflops": 4.150278054200058, "tokens_per_sec": 68.64298640597744, "iter": 7000, "memory": 16130, "step": 7000} +{"base_lr": 1.1799570409769508e-06, "lr": 1.1799570409769508e-06, "data_time": 0.008670806884765625, "loss": 0.033447265625, "time": 0.9976944923400879, "tflops": 5.516201959465654, "tokens_per_sec": 91.21028601297448, "iter": 7010, "memory": 16131, "step": 7010} +{"base_lr": 1.1777740272377766e-06, "lr": 1.1777740272377766e-06, "data_time": 0.008645057678222656, "loss": 1.8359375, "time": 1.198145866394043, "tflops": 7.830457781362184, "tokens_per_sec": 129.36655239345845, "iter": 7020, "memory": 16131, "step": 7020} +{"base_lr": 1.175590138307861e-06, "lr": 1.175590138307861e-06, "data_time": 0.009027957916259766, "loss": 0.003509521484375, "time": 1.025482416152954, "tflops": 7.197913669739464, "tokens_per_sec": 118.96839777863565, "iter": 7030, "memory": 16131, "step": 7030} +{"base_lr": 1.1734053849386026e-06, "lr": 1.1734053849386026e-06, "data_time": 0.008687019348144531, "loss": 0.00390625, "time": 1.267059326171875, "tflops": 6.734520215540222, "tokens_per_sec": 111.28129290195702, "iter": 7040, "memory": 16131, "step": 7040} +{"base_lr": 1.1712197778856549e-06, "lr": 1.1712197778856549e-06, "data_time": 0.008668661117553711, "loss": 1.875, "time": 1.0657057762145996, "tflops": 5.391451290133202, "tokens_per_sec": 89.14280293886748, "iter": 7050, "memory": 16130, "step": 7050} +{"base_lr": 1.1690333279088744e-06, "lr": 1.1690333279088744e-06, "data_time": 0.008703470230102539, "loss": 1.5, "time": 1.0214269161224365, "tflops": 5.625170720864035, "tokens_per_sec": 93.00714373236617, "iter": 7060, "memory": 16130, "step": 7060} +{"base_lr": 1.1668460457722682e-06, "lr": 1.1668460457722682e-06, "data_time": 0.008987188339233398, "loss": 0.01123046875, "time": 1.2627887725830078, "tflops": 5.653316041104631, "tokens_per_sec": 93.44397302372275, "iter": 7070, "memory": 16131, "step": 7070} +{"base_lr": 1.1646579422439397e-06, "lr": 1.1646579422439397e-06, "data_time": 0.008894681930541992, "loss": 0.004669189453125, "time": 1.2538714408874512, "tflops": 5.838504478970167, "tokens_per_sec": 96.50112129060334, "iter": 7080, "memory": 16131, "step": 7080} +{"base_lr": 1.1624690280960355e-06, "lr": 1.1624690280960355e-06, "data_time": 0.008741140365600586, "loss": 1.375, "time": 1.016202449798584, "tflops": 4.40301135738694, "tokens_per_sec": 72.82013541159472, "iter": 7090, "memory": 16130, "step": 7090} +{"base_lr": 1.160279314104693e-06, "lr": 1.160279314104693e-06, "data_time": 0.008597373962402344, "loss": 2.421875, "time": 1.0332677364349365, "tflops": 6.322738528070799, "tokens_per_sec": 104.5227642281039, "iter": 7100, "memory": 16131, "step": 7100} +{"base_lr": 1.1580888110499876e-06, "lr": 1.1580888110499876e-06, "data_time": 0.008908748626708984, "loss": 2.09375, "time": 1.293395757675171, "tflops": 3.833777217501504, "tokens_per_sec": 63.39900182394941, "iter": 7110, "memory": 16131, "step": 7110} +{"base_lr": 1.15589752971588e-06, "lr": 1.15589752971588e-06, "data_time": 0.008710384368896484, "loss": 1.515625, "time": 1.0113894939422607, "tflops": 5.0823226962731, "tokens_per_sec": 84.04279509429877, "iter": 7120, "memory": 16130, "step": 7120} +{"base_lr": 1.1537054808901603e-06, "lr": 1.1537054808901603e-06, "data_time": 0.007485389709472656, "loss": 0.0341796875, "time": 0.9707939624786377, "tflops": 6.604829431223919, "tokens_per_sec": 109.18897736987455, "iter": 7130, "memory": 16131, "step": 7130} +{"base_lr": 1.1515126753643995e-06, "lr": 1.1515126753643995e-06, "data_time": 0.008643627166748047, "loss": 1.5234375, "time": 1.2587590217590332, "tflops": 3.65073956167923, "tokens_per_sec": 60.37692575480778, "iter": 7140, "memory": 16130, "step": 7140} +{"base_lr": 1.149319123933893e-06, "lr": 1.149319123933893e-06, "data_time": 0.00826573371887207, "loss": 0.05615234375, "time": 1.0186567306518555, "tflops": 6.770268757299618, "tokens_per_sec": 111.91208634820248, "iter": 7150, "memory": 16130, "step": 7150} +{"base_lr": 1.1471248373976072e-06, "lr": 1.1471248373976072e-06, "data_time": 0.007601499557495117, "loss": 0.01275634765625, "time": 0.9586939811706543, "tflops": 6.625007011891096, "tokens_per_sec": 109.52400042365525, "iter": 7160, "memory": 16131, "step": 7160} +{"base_lr": 1.1449298265581282e-06, "lr": 1.1449298265581282e-06, "data_time": 0.008692264556884766, "loss": 0.007659912109375, "time": 1.035008430480957, "tflops": 6.604770168096876, "tokens_per_sec": 109.17785466479819, "iter": 7170, "memory": 16131, "step": 7170} +{"base_lr": 1.142734102221609e-06, "lr": 1.142734102221609e-06, "data_time": 0.00858449935913086, "loss": 1.625, "time": 1.0312988758087158, "tflops": 6.687275628548714, "tokens_per_sec": 110.54021552238562, "iter": 7180, "memory": 16131, "step": 7180} +{"base_lr": 1.1405376751977135e-06, "lr": 1.1405376751977135e-06, "data_time": 0.008353710174560547, "loss": 1.4453125, "time": 1.278451919555664, "tflops": 2.9318139811873243, "tokens_per_sec": 48.49615308294119, "iter": 7190, "memory": 16131, "step": 7190} +{"base_lr": 1.138340556299566e-06, "lr": 1.138340556299566e-06, "data_time": 0.008736371994018555, "loss": 1.2265625, "time": 1.1909384727478027, "tflops": 6.350574224496689, "tokens_per_sec": 104.95924253037836, "iter": 7200, "memory": 16131, "step": 7200} +{"base_lr": 1.1361427563436964e-06, "lr": 1.1361427563436964e-06, "data_time": 0.008352994918823242, "loss": 0.01416015625, "time": 1.0245954990386963, "tflops": 6.790161075937398, "tokens_per_sec": 112.23941556232086, "iter": 7210, "memory": 16131, "step": 7210} +{"base_lr": 1.1339442861499875e-06, "lr": 1.1339442861499875e-06, "data_time": 0.009007692337036133, "loss": 0.03173828125, "time": 1.0156433582305908, "tflops": 7.983764779706297, "tokens_per_sec": 131.9360766886882, "iter": 7220, "memory": 16131, "step": 7220} +{"base_lr": 1.1317451565416225e-06, "lr": 1.1317451565416225e-06, "data_time": 0.008941411972045898, "loss": 1.7109375, "time": 1.0287396907806396, "tflops": 5.173163696379255, "tokens_per_sec": 85.54156196028299, "iter": 7230, "memory": 16131, "step": 7230} +{"base_lr": 1.1295453783450298e-06, "lr": 1.1295453783450298e-06, "data_time": 0.008691787719726562, "loss": 2.0, "time": 1.018434762954712, "tflops": 4.155660712323942, "tokens_per_sec": 68.73292482362372, "iter": 7240, "memory": 16130, "step": 7240} +{"base_lr": 1.1273449623898317e-06, "lr": 1.1273449623898317e-06, "data_time": 0.009306669235229492, "loss": 0.0133056640625, "time": 1.0010392665863037, "tflops": 7.192064612866118, "tokens_per_sec": 118.8764556716034, "iter": 7250, "memory": 16131, "step": 7250} +{"base_lr": 1.12514391950879e-06, "lr": 1.12514391950879e-06, "data_time": 0.009049415588378906, "loss": 2.375, "time": 1.0139679908752441, "tflops": 6.861329488466199, "tokens_per_sec": 113.4158090144641, "iter": 7260, "memory": 16130, "step": 7260} +{"base_lr": 1.1229422605377525e-06, "lr": 1.1229422605377525e-06, "data_time": 0.008225202560424805, "loss": 1.625, "time": 1.0032596588134766, "tflops": 8.20316127257129, "tokens_per_sec": 135.5581267572418, "iter": 7270, "memory": 16131, "step": 7270} +{"base_lr": 1.1207399963156e-06, "lr": 1.1207399963156e-06, "data_time": 0.008573055267333984, "loss": 0.025390625, "time": 0.9986610412597656, "tflops": 7.694660206480907, "tokens_per_sec": 127.17027575208911, "iter": 7280, "memory": 16131, "step": 7280} +{"base_lr": 1.1185371376841937e-06, "lr": 1.1185371376841937e-06, "data_time": 0.008667945861816406, "loss": 0.031494140625, "time": 1.06719970703125, "tflops": 5.440648901063218, "tokens_per_sec": 89.95504718321567, "iter": 7290, "memory": 16131, "step": 7290} +{"base_lr": 1.1163336954883208e-06, "lr": 1.1163336954883208e-06, "data_time": 0.008593082427978516, "loss": 0.1728515625, "time": 1.0141985416412354, "tflops": 6.142989299992254, "tokens_per_sec": 101.55802416479304, "iter": 7300, "memory": 16130, "step": 7300} +{"base_lr": 1.1141296805756414e-06, "lr": 1.1141296805756414e-06, "data_time": 0.008672714233398438, "loss": 2.0, "time": 1.0428204536437988, "tflops": 4.9871958653502375, "tokens_per_sec": 82.4686547903988, "iter": 7310, "memory": 16130, "step": 7310} +{"base_lr": 1.1119251037966345e-06, "lr": 1.1119251037966345e-06, "data_time": 0.00830388069152832, "loss": 1.953125, "time": 1.371347188949585, "tflops": 3.792438484305234, "tokens_per_sec": 62.712054753844626, "iter": 7320, "memory": 16130, "step": 7320} +{"base_lr": 1.1097199760045462e-06, "lr": 1.1097199760045462e-06, "data_time": 0.008495330810546875, "loss": 2.21875, "time": 1.2626380920410156, "tflops": 3.0643738160112854, "tokens_per_sec": 50.68752511378401, "iter": 7330, "memory": 16131, "step": 7330} +{"base_lr": 1.1075143080553346e-06, "lr": 1.1075143080553346e-06, "data_time": 0.008533716201782227, "loss": 0.035400390625, "time": 1.0007781982421875, "tflops": 7.860074029319059, "tokens_per_sec": 129.89891289419376, "iter": 7340, "memory": 16131, "step": 7340} +{"base_lr": 1.105308110807617e-06, "lr": 1.105308110807617e-06, "data_time": 0.008748769760131836, "loss": 0.04833984375, "time": 1.0189363956451416, "tflops": 5.460631346614779, "tokens_per_sec": 90.29022850995496, "iter": 7350, "memory": 16131, "step": 7350} +{"base_lr": 1.103101395122617e-06, "lr": 1.103101395122617e-06, "data_time": 0.008434772491455078, "loss": 0.00531005859375, "time": 1.3048782348632812, "tflops": 5.610281348927105, "tokens_per_sec": 92.72895873888574, "iter": 7360, "memory": 16130, "step": 7360} +{"base_lr": 1.1008941718641105e-06, "lr": 1.1008941718641105e-06, "data_time": 0.008824586868286133, "loss": 0.0283203125, "time": 1.007415533065796, "tflops": 7.56763144692798, "tokens_per_sec": 125.0725205878334, "iter": 7370, "memory": 16130, "step": 7370} +{"base_lr": 1.0986864518983714e-06, "lr": 1.0986864518983714e-06, "data_time": 0.008580923080444336, "loss": 0.00927734375, "time": 1.2364389896392822, "tflops": 5.185802606326273, "tokens_per_sec": 85.73006908398983, "iter": 7380, "memory": 16131, "step": 7380} +{"base_lr": 1.096478246094119e-06, "lr": 1.096478246094119e-06, "data_time": 0.008475542068481445, "loss": 1.4140625, "time": 1.0540924072265625, "tflops": 4.30216109110156, "tokens_per_sec": 71.15125721971796, "iter": 7390, "memory": 16130, "step": 7390} +{"base_lr": 1.0942695653224662e-06, "lr": 1.0942695653224662e-06, "data_time": 0.008368968963623047, "loss": 0.0093994140625, "time": 1.001171350479126, "tflops": 6.94902872802302, "tokens_per_sec": 114.86545229730166, "iter": 7400, "memory": 16131, "step": 7400} +{"base_lr": 1.0920604204568615e-06, "lr": 1.0920604204568615e-06, "data_time": 0.008386850357055664, "loss": 0.08935546875, "time": 1.0105645656585693, "tflops": 6.64462596977899, "tokens_per_sec": 109.83959241392277, "iter": 7410, "memory": 16131, "step": 7410} +{"base_lr": 1.0898508223730397e-06, "lr": 1.0898508223730397e-06, "data_time": 0.008121013641357422, "loss": 0.020263671875, "time": 1.0328259468078613, "tflops": 6.560066969043606, "tokens_per_sec": 108.44034306656233, "iter": 7420, "memory": 16131, "step": 7420} +{"base_lr": 1.0876407819489668e-06, "lr": 1.0876407819489668e-06, "data_time": 0.008166790008544922, "loss": 0.0038909912109375, "time": 1.0209472179412842, "tflops": 5.390566933217423, "tokens_per_sec": 89.13291343641663, "iter": 7430, "memory": 16131, "step": 7430} +{"base_lr": 1.0854303100647869e-06, "lr": 1.0854303100647869e-06, "data_time": 0.008682727813720703, "loss": 0.1103515625, "time": 1.058741569519043, "tflops": 5.770127318727471, "tokens_per_sec": 95.39627318665319, "iter": 7440, "memory": 16130, "step": 7440} +{"base_lr": 1.0832194176027665e-06, "lr": 1.0832194176027665e-06, "data_time": 0.008159399032592773, "loss": 2.1875, "time": 1.0463128089904785, "tflops": 5.722899394153138, "tokens_per_sec": 94.61797576140184, "iter": 7450, "memory": 16131, "step": 7450} diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b888a2ceef201da27cf9ced32c476c91c109fa5 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5842f4d284346e6db2ad5997038da4344f71e98951f885bc230ec8ef6b56b7 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f632ad44f7a030e40cb3ebefc81529bb056e3fb7 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12be85db2d909747eabc50fc0351de056ffca5b6bce768099cff1509e55b3976 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9a463d90cd9ea8e425f0ff327fadb9b47215ff7 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac680c0853d9ad64f02e09c742329b769c6089f5cf8a3da97859304c236f4f6a +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1515a1df37a9300ae068767b0e3df5f0af1a1fb7 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa8684a6a4155aef482387caf4a86fe66eb2388b98c6444c6f119e857d216a6 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ffab3b70c60fc8245561aef00a588cd096f2d9a --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd3846b8336309c1fb952f3fdf3d435c202b6a8c62b24b54797faa35161e5b17 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c29ad3a0542186a9fa289db208812c10b6b135c5 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd26f8cda580dd803ee810b47caaf7ed39e4de12815ffaf2b9d397132ed12527 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4c0c90484131d7422b24024161f7d2bad2395d6 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debad177fe141a327e4fa1fd11aea62f61b3a7d06c31aa808838473342882085 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ba074fc999dc08a195da901bb0cff833a21e6f9 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf4d6100a49c554e31933d3243a5e5b2f1366d8314e8bfa718aae3471d4917d +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/mp_rank_00_model_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..98ed3bfa0dbb24626e97f70619347e3a3a0812c0 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_4096.pth/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98315f72ba79f7808402bc6aa4be55a8a80cc87b6515dc5714449dc6d0e53db5 +size 816343368 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4aa230241ccf0e0d7a711254244fb6139eca1f8 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cfe182e40920254b6b0fe68dd569cd5fd34cb701a64ba9a8501667b12f7d878 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16b776be37f268014f5c4b1df5bbf54f628981ac --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7870fa18f29473e40c786a3a02167012e510c579682ea0146c60d747599b46da +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce5c68cebe3edc7bab1064c0011d4db32d54071c --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165d183b68447ad9d4b5d2c6536820b7de1a0b90e732760775cb29e235752755 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa9988ac722cf72b8f50457ef2329c3aaeb51a67 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d52fcbfca606673d5e4e3ec3adaf5ddc8f113b03c09b1cb8c7146e0a09ea2f +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..784b802ce019cb4bd6e1680c0d32ff1cfb1262b4 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cd739e387326c6a2b6917b94469b128f65a1a651812c0b7a5360183179d4dd +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb59fafdd88c89d04e35cb10083d79f080e6d266 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14529d0a241e44b3b84ff31847263c61eeddb5363cbb832400c3dcfaeb217393 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3bda1830fa21b40efe20a6b7638f9423877f69c --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e65c2783921f2b01719c81e0d0bb25179af58996475a7d1da0a49f86183095 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9c3da8eeaf34dcc1e4f6ef2caad7794f1713746 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba09e83a6d8404a6044c6d20e583494fb34379aea9e07b42d408d3cb1706cec +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/mp_rank_00_model_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7940a6f145d43d5de95be01a2bf1533a5656ee63 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/iter_7452.pth/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50da5073632e92ab4e32249ac8ebb32249e31179d63f6cbe16cdf6664176001 +size 816784840 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/last_checkpoint b/stage_2/multi_stage2_run_stage1_both/stage2a/last_checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..44496ebb011dc0316a11747d5ba6ae974157302c --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/last_checkpoint @@ -0,0 +1 @@ +/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a/iter_7452.pth \ No newline at end of file diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/temp_config_stage_2a.py b/stage_2/multi_stage2_run_stage1_both/stage2a/temp_config_stage_2a.py new file mode 100644 index 0000000000000000000000000000000000000000..348fa4a2ed2b288a9b954eaaa7658bcf2c9460ca --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/temp_config_stage_2a.py @@ -0,0 +1,261 @@ +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = None +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-06, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2a_easy.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a' diff --git a/stage_2/multi_stage2_run_stage1_both/stage2a/zero_to_fp32.py b/stage_2/multi_stage2_run_stage1_both/stage2a/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2a/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/20250925_230352.log b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/20250925_230352.log new file mode 100644 index 0000000000000000000000000000000000000000..48c1bf0062900aefa8f4100b092122a16b85dee6 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/20250925_230352.log @@ -0,0 +1,1723 @@ +2025/09/25 23:03:52 - mmengine - INFO - +------------------------------------------------------------ +System environment: + sys.platform: linux + Python: 3.11.2 (main, May 2 2024, 11:59:08) [GCC 12.2.0] + CUDA available: True + MUSA available: False + numpy_random_seed: 1093020178 + GPU 0,1,2,3,4,5,6,7: NVIDIA H100 80GB HBM3 + CUDA_HOME: /usr/local/cuda + NVCC: Cuda compilation tools, release 12.4, V12.4.131 + GCC: x86_64-linux-gnu-gcc (Debian 12.2.0-14) 12.2.0 + PyTorch: 2.4.1 + PyTorch compiling details: PyTorch built with: + - GCC 12.2 + - C++ Version: 201703 + - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications + - Intel(R) MKL-DNN v3.4.2 (Git Hash 1137e04ec0b5251ca2b4400a4fd3c667ce843d67) + - OpenMP 201511 (a.k.a. OpenMP 4.5) + - LAPACK is enabled (usually provided by MKL) + - NNPACK is enabled + - CPU capability usage: AVX512 + - CUDA Runtime 12.4 + - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90 + - CuDNN 90.4 (built against CUDA 12.6) + - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.4.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0 -Wno-uninitialized -Wno-maybe-uninitialized -Wno-nonnull -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, + + TorchVision: 0.19.1+cu124 + OpenCV: 4.10.0 + MMEngine: 0.10.7 + +Runtime environment: + launcher: pytorch + randomness: {'deterministic': False, 'seed': None} + cudnn_benchmark: False + dist_cfg: {'backend': 'nccl'} + mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} + deterministic: False + seed: None + Distributed launcher: pytorch + Distributed training: True + GPU number: 8 +------------------------------------------------------------ + +2025/09/25 23:03:53 - mmengine - INFO - Config: +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a/iter_7452.pth' +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-05, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2b_medium.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b' + +2025/09/25 23:03:54 - mmengine - WARNING - Failed to search registry with scope "mmengine" in the "builder" registry tree. As a workaround, the current "builder" registry in "xtuner" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "mmengine" is a correct scope, or whether the registry is initialized. +2025/09/25 23:03:55 - mmengine - INFO - Hooks will be executed in the following order: +before_run: +(VERY_HIGH ) RuntimeInfoHook +(55 ) ThroughputHook +(BELOW_NORMAL) LoggerHook + -------------------- +before_train: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(NORMAL ) DatasetInfoHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +before_train_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(NORMAL ) DistSamplerSeedHook + -------------------- +before_train_iter: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook + -------------------- +after_train_iter: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(55 ) ThroughputHook +(BELOW_NORMAL) LoggerHook +(LOW ) ParamSchedulerHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +after_train_epoch: +(NORMAL ) IterTimerHook +(LOW ) ParamSchedulerHook +(VERY_LOW ) CheckpointHook + -------------------- +before_val: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) DatasetInfoHook + -------------------- +before_val_epoch: +(NORMAL ) IterTimerHook + -------------------- +before_val_iter: +(NORMAL ) IterTimerHook + -------------------- +after_val_iter: +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_val_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook +(LOW ) ParamSchedulerHook +(VERY_LOW ) CheckpointHook + -------------------- +after_val: +(VERY_HIGH ) RuntimeInfoHook +(LOW ) EvaluateChatHook + -------------------- +after_train: +(VERY_HIGH ) RuntimeInfoHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +before_test: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) DatasetInfoHook + -------------------- +before_test_epoch: +(NORMAL ) IterTimerHook + -------------------- +before_test_iter: +(NORMAL ) IterTimerHook + -------------------- +after_test_iter: +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_test_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_test: +(VERY_HIGH ) RuntimeInfoHook + -------------------- +after_run: +(BELOW_NORMAL) LoggerHook + -------------------- +2025/09/25 23:03:57 - mmengine - INFO - Loading unwanted prefixes from: /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv +2025/09/25 23:03:57 - mmengine - INFO - Loaded 210 prefixes to filter out. +2025/09/25 23:03:57 - mmengine - INFO - Filtered out 712 samples. +2025/09/25 23:03:57 - mmengine - INFO - [DEBUG] dataset full size used. +2025/09/25 23:03:57 - mmengine - INFO - xtuner_dataset_timeout = 1:00:00 +2025/09/25 23:04:50 - mmengine - WARNING - Dataset LLaVADataset has no metainfo. ``dataset_meta`` in visualizer will be None. +2025/09/25 23:04:50 - mmengine - INFO - train_stage == 2 +2025/09/25 23:05:05 - mmengine - INFO - using simple Resampler with 100 latents +2025/09/25 23:05:38 - mmengine - INFO - enable projector input require grads +2025/09/25 23:05:38 - mmengine - INFO - enable input required grads for projector +2025/09/25 23:05:38 - mmengine - INFO - Building lora +2025/09/25 23:05:41 - mmengine - INFO - loading token_merge from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors +2025/09/25 23:05:41 - mmengine - INFO - Loading projector from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors +2025/09/25 23:05:41 - mmengine - INFO - Loading resampler from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.embed_tokens.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.0.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.1.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.2.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.3.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.4.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.5.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.6.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.7.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.8.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.9.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.10.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.11.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.12.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.13.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.14.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.15.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.16.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.17.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.18.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.19.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.20.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.21.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.22.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.23.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.24.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.25.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.26.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.input_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.layers.27.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.model.norm.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - WARNING - llm.base_model.model.lm_head.weight is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- token_merge.ln_in.weight:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- token_merge.ln_in.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- token_merge.ln_out.weight:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- token_merge.ln_out.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- projector.model.0.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- projector.model.2.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - WARNING - resampler.query_pos_embed is skipped since its requires_grad=False +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.attn.out_proj.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.ln_q.weight:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.ln_q.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.ln_kv.weight:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.ln_kv.bias:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.ln_post.weight:weight_decay=0.0 +2025/09/25 23:05:49 - mmengine - INFO - paramwise_options -- resampler.ln_post.bias:weight_decay=0.0 +2025/09/25 23:07:19 - mmengine - INFO - Load checkpoint from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a/iter_7452.pth +2025/09/25 23:07:21 - mmengine - INFO - Num train samples 36820 +2025/09/25 23:07:21 - mmengine - INFO - train example: +2025/09/25 23:07:21 - mmengine - INFO - <|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 23:07:21 - mmengine - INFO - before_train in EvaluateChatHook. +2025/09/25 23:07:21 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:07:25 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary. The lobules are arranged in a radial pattern, with the luminal surface facing towards the center of the lobule. The + +2025/09/25 23:07:25 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:07:29 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The whole slide image reveals a well-demarcated area of neoplastic tissue, characterized by the presence of malignant cells. These cells exhibit marked atypia, with irregular nuclear contours, hyperchromatic nuclei, and prominent nucleoli. The ne + +2025/09/25 23:07:29 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:07:30 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 23:07:30 - mmengine - INFO - [check] projector params NOT in optimizer: ['model.0.weight', 'model.0.bias', 'model.2.weight', 'model.2.bias'] +2025/09/25 23:07:30 - mmengine - INFO - [Trainable] 407,778,304 / 4,761,108,992 params (8.56%) +2025/09/25 23:07:30 - mmengine - INFO - llm.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +2025/09/25 23:07:30 - mmengine - INFO - ... (212 more trainable tensors not shown) +2025/09/25 23:07:30 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io +2025/09/25 23:07:30 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. +2025/09/25 23:07:30 - mmengine - INFO - Checkpoints will be saved to /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b. +2025/09/25 23:07:41 - mmengine - INFO - Iter(train) [ 10/4603] base_lr: 5.8824e-07 lr: 5.8824e-07 eta: 1:18:36 time: 0.9873 data_time: 0.0085 memory: 15742 loss: 0.2188 tflops: 4.1642 tokens_per_sec: 68.8759 +2025/09/25 23:07:51 - mmengine - INFO - Iter(train) [ 20/4603] base_lr: 1.0196e-06 lr: 1.0196e-06 eta: 1:17:45 time: 1.0129 data_time: 0.0089 memory: 15742 loss: 1.3906 tflops: 3.8798 tokens_per_sec: 64.1743 +2025/09/25 23:08:02 - mmengine - INFO - Iter(train) [ 30/4603] base_lr: 1.4510e-06 lr: 1.4510e-06 eta: 1:19:33 time: 1.0542 data_time: 0.0086 memory: 15742 loss: 0.6914 tflops: 2.8095 tokens_per_sec: 46.4811 +2025/09/25 23:08:13 - mmengine - INFO - Iter(train) [ 40/4603] base_lr: 1.8824e-06 lr: 1.8824e-06 eta: 1:19:57 time: 1.0748 data_time: 0.0092 memory: 15742 loss: 0.3184 tflops: 2.3617 tokens_per_sec: 39.0766 +2025/09/25 23:08:23 - mmengine - INFO - Iter(train) [ 50/4603] base_lr: 2.3137e-06 lr: 2.3137e-06 eta: 1:19:25 time: 1.0274 data_time: 0.0091 memory: 15743 loss: 0.0674 tflops: 3.8838 tokens_per_sec: 64.2395 +2025/09/25 23:08:34 - mmengine - INFO - Iter(train) [ 60/4603] base_lr: 2.7451e-06 lr: 2.7451e-06 eta: 1:20:20 time: 0.9788 data_time: 0.0076 memory: 15742 loss: 1.5703 tflops: 5.2513 tokens_per_sec: 86.8374 +2025/09/25 23:08:45 - mmengine - INFO - Iter(train) [ 70/4603] base_lr: 3.1765e-06 lr: 3.1765e-06 eta: 1:19:54 time: 0.9778 data_time: 0.0087 memory: 16133 loss: 2.0469 tflops: 5.3186 tokens_per_sec: 87.9482 +2025/09/25 23:08:55 - mmengine - INFO - Iter(train) [ 80/4603] base_lr: 3.6078e-06 lr: 3.6078e-06 eta: 1:19:28 time: 0.9880 data_time: 0.0090 memory: 16133 loss: 0.0820 tflops: 4.7737 tokens_per_sec: 78.9463 +2025/09/25 23:09:06 - mmengine - INFO - Iter(train) [ 90/4603] base_lr: 4.0392e-06 lr: 4.0392e-06 eta: 1:19:47 time: 1.2023 data_time: 0.0085 memory: 16133 loss: 1.2266 tflops: 4.3760 tokens_per_sec: 72.3601 +2025/09/25 23:09:16 - mmengine - INFO - Iter(train) [ 100/4603] base_lr: 4.4706e-06 lr: 4.4706e-06 eta: 1:19:15 time: 1.0407 data_time: 0.0088 memory: 16133 loss: 1.0156 tflops: 4.4738 tokens_per_sec: 73.9879 +2025/09/25 23:09:27 - mmengine - INFO - Iter(train) [ 110/4603] base_lr: 4.9020e-06 lr: 4.9020e-06 eta: 1:19:10 time: 0.9841 data_time: 0.0094 memory: 16133 loss: 1.9844 tflops: 4.8542 tokens_per_sec: 80.2768 +2025/09/25 23:09:38 - mmengine - INFO - Iter(train) [ 120/4603] base_lr: 5.3333e-06 lr: 5.3333e-06 eta: 1:19:13 time: 1.0232 data_time: 0.0086 memory: 16133 loss: 0.2754 tflops: 3.3085 tokens_per_sec: 54.7319 +2025/09/25 23:09:48 - mmengine - INFO - Iter(train) [ 130/4603] base_lr: 5.7647e-06 lr: 5.7647e-06 eta: 1:18:58 time: 1.0206 data_time: 0.0086 memory: 16133 loss: 1.5781 tflops: 4.6212 tokens_per_sec: 76.4251 +2025/09/25 23:09:59 - mmengine - INFO - Iter(train) [ 140/4603] base_lr: 6.1961e-06 lr: 6.1961e-06 eta: 1:18:58 time: 1.3175 data_time: 0.0087 memory: 16133 loss: 0.0869 tflops: 3.2124 tokens_per_sec: 53.1313 +2025/09/25 23:10:10 - mmengine - INFO - Iter(train) [ 150/4603] base_lr: 6.6275e-06 lr: 6.6275e-06 eta: 1:18:46 time: 0.9734 data_time: 0.0091 memory: 16133 loss: 1.5156 tflops: 4.7832 tokens_per_sec: 79.1045 +2025/09/25 23:10:20 - mmengine - INFO - Iter(train) [ 160/4603] base_lr: 7.0588e-06 lr: 7.0588e-06 eta: 1:18:37 time: 1.3031 data_time: 0.0090 memory: 16133 loss: 0.0952 tflops: 2.5050 tokens_per_sec: 41.4410 +2025/09/25 23:10:31 - mmengine - INFO - Iter(train) [ 170/4603] base_lr: 7.4902e-06 lr: 7.4902e-06 eta: 1:18:22 time: 0.9896 data_time: 0.0092 memory: 16133 loss: 1.3359 tflops: 4.4601 tokens_per_sec: 73.7649 +2025/09/25 23:10:42 - mmengine - INFO - Iter(train) [ 180/4603] base_lr: 7.9216e-06 lr: 7.9216e-06 eta: 1:18:23 time: 1.0034 data_time: 0.0089 memory: 16133 loss: 0.1011 tflops: 4.5197 tokens_per_sec: 74.7485 +2025/09/25 23:10:52 - mmengine - INFO - Iter(train) [ 190/4603] base_lr: 8.3529e-06 lr: 8.3529e-06 eta: 1:18:04 time: 1.0011 data_time: 0.0089 memory: 16133 loss: 2.0312 tflops: 5.9209 tokens_per_sec: 97.8929 +2025/09/25 23:11:04 - mmengine - INFO - Iter(train) [ 200/4603] base_lr: 8.7843e-06 lr: 8.7843e-06 eta: 1:18:13 time: 0.9855 data_time: 0.0093 memory: 16133 loss: 0.0188 tflops: 6.0149 tokens_per_sec: 99.4464 +2025/09/25 23:11:14 - mmengine - INFO - Iter(train) [ 210/4603] base_lr: 9.2157e-06 lr: 9.2157e-06 eta: 1:18:02 time: 1.0007 data_time: 0.0085 memory: 16133 loss: 0.2461 tflops: 2.8992 tokens_per_sec: 47.9660 +2025/09/25 23:11:25 - mmengine - INFO - Iter(train) [ 220/4603] base_lr: 9.6471e-06 lr: 9.6471e-06 eta: 1:17:46 time: 1.0420 data_time: 0.0086 memory: 16133 loss: 0.6055 tflops: 3.3069 tokens_per_sec: 54.7035 +2025/09/25 23:11:36 - mmengine - INFO - Iter(train) [ 230/4603] base_lr: 1.0078e-05 lr: 1.0078e-05 eta: 1:17:41 time: 0.9833 data_time: 0.0084 memory: 16133 loss: 1.4922 tflops: 4.3657 tokens_per_sec: 72.2054 +2025/09/25 23:11:46 - mmengine - INFO - Iter(train) [ 240/4603] base_lr: 1.0510e-05 lr: 1.0510e-05 eta: 1:17:30 time: 1.0031 data_time: 0.0094 memory: 16133 loss: 0.2334 tflops: 2.8923 tokens_per_sec: 47.8507 +2025/09/25 23:11:56 - mmengine - INFO - Iter(train) [ 250/4603] base_lr: 1.0941e-05 lr: 1.0941e-05 eta: 1:17:10 time: 0.9933 data_time: 0.0087 memory: 16133 loss: 0.0605 tflops: 4.1998 tokens_per_sec: 69.4638 +2025/09/25 23:12:07 - mmengine - INFO - Iter(train) [ 260/4603] base_lr: 1.1373e-05 lr: 1.1373e-05 eta: 1:17:04 time: 1.2198 data_time: 0.0086 memory: 16133 loss: 1.6797 tflops: 4.7599 tokens_per_sec: 78.6990 +2025/09/25 23:12:18 - mmengine - INFO - Iter(train) [ 270/4603] base_lr: 1.1804e-05 lr: 1.1804e-05 eta: 1:16:53 time: 1.0024 data_time: 0.0092 memory: 16133 loss: 0.7422 tflops: 2.8340 tokens_per_sec: 46.8882 +2025/09/25 23:12:29 - mmengine - INFO - Iter(train) [ 280/4603] base_lr: 1.2235e-05 lr: 1.2235e-05 eta: 1:16:42 time: 1.2699 data_time: 0.0088 memory: 16133 loss: 0.1768 tflops: 2.2847 tokens_per_sec: 37.7983 +2025/09/25 23:12:39 - mmengine - INFO - Iter(train) [ 290/4603] base_lr: 1.2667e-05 lr: 1.2667e-05 eta: 1:16:32 time: 0.9911 data_time: 0.0086 memory: 16133 loss: 1.9375 tflops: 3.0495 tokens_per_sec: 50.4510 +2025/09/25 23:12:50 - mmengine - INFO - Iter(train) [ 300/4603] base_lr: 1.3098e-05 lr: 1.3098e-05 eta: 1:16:20 time: 1.0014 data_time: 0.0089 memory: 16133 loss: 0.0532 tflops: 4.8912 tokens_per_sec: 80.8860 +2025/09/25 23:13:00 - mmengine - INFO - Iter(train) [ 310/4603] base_lr: 1.3529e-05 lr: 1.3529e-05 eta: 1:16:04 time: 1.0029 data_time: 0.0088 memory: 16133 loss: 1.8203 tflops: 5.3667 tokens_per_sec: 88.7398 +2025/09/25 23:13:11 - mmengine - INFO - Iter(train) [ 320/4603] base_lr: 1.3961e-05 lr: 1.3961e-05 eta: 1:16:02 time: 1.2423 data_time: 0.0089 memory: 16133 loss: 3.6406 tflops: 3.1633 tokens_per_sec: 52.3238 +2025/09/25 23:13:22 - mmengine - INFO - Iter(train) [ 330/4603] base_lr: 1.4392e-05 lr: 1.4392e-05 eta: 1:15:50 time: 0.9795 data_time: 0.0085 memory: 16133 loss: 0.2158 tflops: 2.9620 tokens_per_sec: 49.0043 +2025/09/25 23:13:32 - mmengine - INFO - Iter(train) [ 340/4603] base_lr: 1.4824e-05 lr: 1.4824e-05 eta: 1:15:36 time: 0.9973 data_time: 0.0089 memory: 16133 loss: 1.4844 tflops: 3.9404 tokens_per_sec: 65.1772 +2025/09/25 23:13:43 - mmengine - INFO - Iter(train) [ 350/4603] base_lr: 1.5255e-05 lr: 1.5255e-05 eta: 1:15:29 time: 0.9842 data_time: 0.0092 memory: 16133 loss: 0.7891 tflops: 2.8863 tokens_per_sec: 47.7533 +2025/09/25 23:13:54 - mmengine - INFO - Iter(train) [ 360/4603] base_lr: 1.5686e-05 lr: 1.5686e-05 eta: 1:15:15 time: 1.0179 data_time: 0.0088 memory: 16133 loss: 1.7578 tflops: 5.0498 tokens_per_sec: 83.5057 +2025/09/25 23:14:04 - mmengine - INFO - Iter(train) [ 370/4603] base_lr: 1.6118e-05 lr: 1.6118e-05 eta: 1:15:03 time: 1.0027 data_time: 0.0091 memory: 16133 loss: 1.4766 tflops: 5.9720 tokens_per_sec: 98.7363 +2025/09/25 23:14:15 - mmengine - INFO - Iter(train) [ 380/4603] base_lr: 1.6549e-05 lr: 1.6549e-05 eta: 1:14:55 time: 0.9891 data_time: 0.0092 memory: 16133 loss: 0.0253 tflops: 3.3001 tokens_per_sec: 54.5935 +2025/09/25 23:14:26 - mmengine - INFO - Iter(train) [ 390/4603] base_lr: 1.6980e-05 lr: 1.6980e-05 eta: 1:14:47 time: 1.0506 data_time: 0.0096 memory: 16133 loss: 0.0293 tflops: 3.6827 tokens_per_sec: 60.9157 +2025/09/25 23:14:37 - mmengine - INFO - Iter(train) [ 400/4603] base_lr: 1.7412e-05 lr: 1.7412e-05 eta: 1:14:37 time: 1.0348 data_time: 0.0087 memory: 16133 loss: 0.1060 tflops: 2.9789 tokens_per_sec: 49.2828 +2025/09/25 23:14:47 - mmengine - INFO - Iter(train) [ 410/4603] base_lr: 1.7843e-05 lr: 1.7843e-05 eta: 1:14:23 time: 0.9999 data_time: 0.0090 memory: 16133 loss: 0.8438 tflops: 3.0829 tokens_per_sec: 51.0030 +2025/09/25 23:14:58 - mmengine - INFO - Iter(train) [ 420/4603] base_lr: 1.8275e-05 lr: 1.8275e-05 eta: 1:14:13 time: 1.0185 data_time: 0.0089 memory: 16133 loss: 0.0356 tflops: 4.2149 tokens_per_sec: 69.7122 +2025/09/25 23:15:08 - mmengine - INFO - Iter(train) [ 430/4603] base_lr: 1.8706e-05 lr: 1.8706e-05 eta: 1:14:01 time: 1.2031 data_time: 0.0090 memory: 16133 loss: 1.4375 tflops: 3.6686 tokens_per_sec: 60.6746 +2025/09/25 23:15:19 - mmengine - INFO - Iter(train) [ 440/4603] base_lr: 1.9137e-05 lr: 1.9137e-05 eta: 1:13:55 time: 0.9838 data_time: 0.0087 memory: 16133 loss: 0.0391 tflops: 4.2405 tokens_per_sec: 70.1372 +2025/09/25 23:15:30 - mmengine - INFO - Iter(train) [ 450/4603] base_lr: 1.9569e-05 lr: 1.9569e-05 eta: 1:13:46 time: 1.0000 data_time: 0.0086 memory: 16133 loss: 0.3164 tflops: 2.9617 tokens_per_sec: 48.9994 +2025/09/25 23:15:41 - mmengine - INFO - Iter(train) [ 460/4603] base_lr: 2.0000e-05 lr: 2.0000e-05 eta: 1:13:34 time: 1.2424 data_time: 0.0090 memory: 16133 loss: 0.0366 tflops: 3.6501 tokens_per_sec: 60.3670 +2025/09/25 23:15:52 - mmengine - INFO - Iter(train) [ 470/4603] base_lr: 2.0000e-05 lr: 2.0000e-05 eta: 1:13:26 time: 1.0127 data_time: 0.0087 memory: 16133 loss: 1.4062 tflops: 5.1353 tokens_per_sec: 84.9183 +2025/09/25 23:16:02 - mmengine - INFO - Iter(train) [ 480/4603] base_lr: 2.0000e-05 lr: 2.0000e-05 eta: 1:13:15 time: 0.9958 data_time: 0.0092 memory: 16133 loss: 3.4375 tflops: 4.2501 tokens_per_sec: 70.2946 +2025/09/25 23:16:12 - mmengine - INFO - Iter(train) [ 490/4603] base_lr: 1.9999e-05 lr: 1.9999e-05 eta: 1:13:00 time: 0.9862 data_time: 0.0092 memory: 16133 loss: 0.0250 tflops: 4.3530 tokens_per_sec: 71.9953 +2025/09/25 23:16:24 - mmengine - INFO - Iter(train) [ 500/4603] base_lr: 1.9999e-05 lr: 1.9999e-05 eta: 1:12:55 time: 1.0194 data_time: 0.0088 memory: 16133 loss: 0.0376 tflops: 3.8547 tokens_per_sec: 63.7599 +2025/09/25 23:16:34 - mmengine - INFO - Iter(train) [ 510/4603] base_lr: 1.9998e-05 lr: 1.9998e-05 eta: 1:12:40 time: 0.9887 data_time: 0.0099 memory: 16133 loss: 0.0178 tflops: 4.0358 tokens_per_sec: 66.7541 +2025/09/25 23:16:36 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 23:16:36 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:16:40 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. The lobules are well-defined and show a clear boundary between the tumor and the surrounding stroma. The lobules are composed of a cohesive group of tumor cells with a relatively uniform appearance.<|im_end|> + +2025/09/25 23:16:40 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:17:27 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a well-differentiated papillary thyroid carcinoma with a follicular component. The tumor cells exhibit papillary architecture with nuclear features indicative of papillary thyroid carcinoma, including nuclear grooves and nuclear inclusions. The follicular component is characterized by a monomorphic population of follicular cells with a delicate fibrovascular stroma. The tumor cells show a high degree of nuclear pleomorphism and nuclear atypia, with occasional mitotic figures. The tumor is infiltrative, with invasion into the surrounding thyroid tissue and adjacent structures. The tumor cells are positive for TTF-1, thyroglobulin, and galectin-3, and negative for CD10, CD5, and CD20. The tumor is encapsulated, with a well-defined capsule. The tumor cells are negative for estrogen receptor (ER), progesterone receptor (PR), and HER2/neu. The tumor is negative for p53 and p16, and positive for p27. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. The tumor is negative for p16, p16, and p16. + +2025/09/25 23:17:27 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:17:28 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +B) Moderately differentiated squamous cell carcinoma<|im_end|> + +2025/09/25 23:18:21 - mmengine - INFO - Iter(train) [ 520/4603] base_lr: 1.9998e-05 lr: 1.9998e-05 eta: 1:25:06 time: 0.9824 data_time: 0.0090 memory: 16133 loss: 1.4453 tflops: 4.4314 tokens_per_sec: 73.2908 +2025/09/25 23:18:32 - mmengine - INFO - Iter(train) [ 530/4603] base_lr: 1.9997e-05 lr: 1.9997e-05 eta: 1:24:44 time: 0.9557 data_time: 0.0072 memory: 16133 loss: 1.9062 tflops: 4.3649 tokens_per_sec: 72.1951 +2025/09/25 23:18:42 - mmengine - INFO - Iter(train) [ 540/4603] base_lr: 1.9996e-05 lr: 1.9996e-05 eta: 1:24:12 time: 0.9996 data_time: 0.0095 memory: 16133 loss: 0.0474 tflops: 4.1127 tokens_per_sec: 68.0245 +2025/09/25 23:18:53 - mmengine - INFO - Iter(train) [ 550/4603] base_lr: 1.9995e-05 lr: 1.9995e-05 eta: 1:23:46 time: 1.0192 data_time: 0.0098 memory: 16133 loss: 0.7344 tflops: 5.1622 tokens_per_sec: 85.3608 +2025/09/25 23:19:04 - mmengine - INFO - Iter(train) [ 560/4603] base_lr: 1.9994e-05 lr: 1.9994e-05 eta: 1:23:25 time: 1.0077 data_time: 0.0085 memory: 16133 loss: 1.4609 tflops: 5.5815 tokens_per_sec: 92.2883 +2025/09/25 23:19:14 - mmengine - INFO - Iter(train) [ 570/4603] base_lr: 1.9992e-05 lr: 1.9992e-05 eta: 1:22:57 time: 1.0561 data_time: 0.0090 memory: 16133 loss: 1.1953 tflops: 4.2367 tokens_per_sec: 70.0691 +2025/09/25 23:19:25 - mmengine - INFO - Iter(train) [ 580/4603] base_lr: 1.9991e-05 lr: 1.9991e-05 eta: 1:22:37 time: 1.3263 data_time: 0.0090 memory: 16133 loss: 0.0354 tflops: 3.0998 tokens_per_sec: 51.2715 +2025/09/25 23:19:37 - mmengine - INFO - Iter(train) [ 590/4603] base_lr: 1.9989e-05 lr: 1.9989e-05 eta: 1:22:19 time: 1.0296 data_time: 0.0095 memory: 16133 loss: 0.1206 tflops: 3.7579 tokens_per_sec: 62.1584 +2025/09/25 23:19:47 - mmengine - INFO - Iter(train) [ 600/4603] base_lr: 1.9988e-05 lr: 1.9988e-05 eta: 1:21:51 time: 0.9971 data_time: 0.0090 memory: 16133 loss: 1.0312 tflops: 4.6087 tokens_per_sec: 76.2198 +2025/09/25 23:19:57 - mmengine - INFO - Iter(train) [ 610/4603] base_lr: 1.9986e-05 lr: 1.9986e-05 eta: 1:21:28 time: 0.9819 data_time: 0.0083 memory: 16133 loss: 1.5547 tflops: 3.9403 tokens_per_sec: 65.1768 +2025/09/25 23:20:08 - mmengine - INFO - Iter(train) [ 620/4603] base_lr: 1.9984e-05 lr: 1.9984e-05 eta: 1:21:08 time: 1.0001 data_time: 0.0097 memory: 16133 loss: 0.1069 tflops: 3.2035 tokens_per_sec: 52.9971 +2025/09/25 23:20:18 - mmengine - INFO - Iter(train) [ 630/4603] base_lr: 1.9982e-05 lr: 1.9982e-05 eta: 1:20:42 time: 0.9979 data_time: 0.0092 memory: 16133 loss: 1.0234 tflops: 2.8469 tokens_per_sec: 47.1009 +2025/09/25 23:20:30 - mmengine - INFO - Iter(train) [ 640/4603] base_lr: 1.9979e-05 lr: 1.9979e-05 eta: 1:20:24 time: 1.2734 data_time: 0.0088 memory: 16133 loss: 1.2188 tflops: 4.2268 tokens_per_sec: 69.8913 +2025/09/25 23:20:40 - mmengine - INFO - Iter(train) [ 650/4603] base_lr: 1.9977e-05 lr: 1.9977e-05 eta: 1:20:03 time: 0.9979 data_time: 0.0087 memory: 16133 loss: 0.0199 tflops: 3.7561 tokens_per_sec: 62.1309 +2025/09/25 23:20:51 - mmengine - INFO - Iter(train) [ 660/4603] base_lr: 1.9974e-05 lr: 1.9974e-05 eta: 1:19:39 time: 0.9894 data_time: 0.0093 memory: 16133 loss: 0.2559 tflops: 3.3604 tokens_per_sec: 55.5913 +2025/09/25 23:21:01 - mmengine - INFO - Iter(train) [ 670/4603] base_lr: 1.9972e-05 lr: 1.9972e-05 eta: 1:19:18 time: 1.0174 data_time: 0.0089 memory: 16133 loss: 0.0151 tflops: 3.8623 tokens_per_sec: 63.8855 +2025/09/25 23:21:12 - mmengine - INFO - Iter(train) [ 680/4603] base_lr: 1.9969e-05 lr: 1.9969e-05 eta: 1:18:57 time: 0.9885 data_time: 0.0087 memory: 16133 loss: 1.7422 tflops: 5.0774 tokens_per_sec: 83.9643 +2025/09/25 23:21:22 - mmengine - INFO - Iter(train) [ 690/4603] base_lr: 1.9966e-05 lr: 1.9966e-05 eta: 1:18:34 time: 0.9927 data_time: 0.0086 memory: 16133 loss: 0.0022 tflops: 4.4463 tokens_per_sec: 73.5371 +2025/09/25 23:21:32 - mmengine - INFO - Iter(train) [ 700/4603] base_lr: 1.9963e-05 lr: 1.9963e-05 eta: 1:18:13 time: 1.0041 data_time: 0.0088 memory: 16133 loss: 0.2656 tflops: 2.5281 tokens_per_sec: 41.8300 +2025/09/25 23:21:43 - mmengine - INFO - Iter(train) [ 710/4603] base_lr: 1.9960e-05 lr: 1.9960e-05 eta: 1:17:55 time: 1.0117 data_time: 0.0094 memory: 16133 loss: 0.0811 tflops: 3.3459 tokens_per_sec: 55.3506 +2025/09/25 23:21:54 - mmengine - INFO - Iter(train) [ 720/4603] base_lr: 1.9957e-05 lr: 1.9957e-05 eta: 1:17:34 time: 1.0025 data_time: 0.0094 memory: 16133 loss: 0.1963 tflops: 2.8940 tokens_per_sec: 47.8797 +2025/09/25 23:22:04 - mmengine - INFO - Iter(train) [ 730/4603] base_lr: 1.9953e-05 lr: 1.9953e-05 eta: 1:17:15 time: 1.0114 data_time: 0.0087 memory: 16133 loss: 0.1973 tflops: 4.4237 tokens_per_sec: 73.1628 +2025/09/25 23:22:15 - mmengine - INFO - Iter(train) [ 740/4603] base_lr: 1.9950e-05 lr: 1.9950e-05 eta: 1:16:56 time: 1.0097 data_time: 0.0092 memory: 16133 loss: 0.0679 tflops: 3.2927 tokens_per_sec: 54.4707 +2025/09/25 23:22:26 - mmengine - INFO - Iter(train) [ 750/4603] base_lr: 1.9946e-05 lr: 1.9946e-05 eta: 1:16:38 time: 1.0112 data_time: 0.0088 memory: 16133 loss: 0.0586 tflops: 3.8262 tokens_per_sec: 63.2887 +2025/09/25 23:22:36 - mmengine - INFO - Iter(train) [ 760/4603] base_lr: 1.9942e-05 lr: 1.9942e-05 eta: 1:16:18 time: 0.9932 data_time: 0.0088 memory: 16133 loss: 1.3281 tflops: 4.9927 tokens_per_sec: 82.5639 +2025/09/25 23:22:46 - mmengine - INFO - Iter(train) [ 770/4603] base_lr: 1.9938e-05 lr: 1.9938e-05 eta: 1:15:59 time: 0.9754 data_time: 0.0086 memory: 16133 loss: 0.1357 tflops: 2.9745 tokens_per_sec: 49.2113 +2025/09/25 23:22:57 - mmengine - INFO - Iter(train) [ 780/4603] base_lr: 1.9934e-05 lr: 1.9934e-05 eta: 1:15:39 time: 0.9906 data_time: 0.0098 memory: 16133 loss: 0.0437 tflops: 4.0282 tokens_per_sec: 66.6275 +2025/09/25 23:23:07 - mmengine - INFO - Iter(train) [ 790/4603] base_lr: 1.9930e-05 lr: 1.9930e-05 eta: 1:15:22 time: 1.2006 data_time: 0.0095 memory: 16133 loss: 0.0466 tflops: 3.7772 tokens_per_sec: 62.4689 +2025/09/25 23:23:18 - mmengine - INFO - Iter(train) [ 800/4603] base_lr: 1.9926e-05 lr: 1.9926e-05 eta: 1:15:04 time: 0.9970 data_time: 0.0090 memory: 16133 loss: 0.4609 tflops: 3.2132 tokens_per_sec: 53.1575 +2025/09/25 23:23:28 - mmengine - INFO - Iter(train) [ 810/4603] base_lr: 1.9922e-05 lr: 1.9922e-05 eta: 1:14:44 time: 1.0055 data_time: 0.0091 memory: 16133 loss: 0.0148 tflops: 5.8949 tokens_per_sec: 97.4622 +2025/09/25 23:23:39 - mmengine - INFO - Iter(train) [ 820/4603] base_lr: 1.9917e-05 lr: 1.9917e-05 eta: 1:14:28 time: 1.2570 data_time: 0.0086 memory: 16133 loss: 0.0170 tflops: 2.9819 tokens_per_sec: 49.3247 +2025/09/25 23:23:49 - mmengine - INFO - Iter(train) [ 830/4603] base_lr: 1.9912e-05 lr: 1.9912e-05 eta: 1:14:09 time: 0.9821 data_time: 0.0088 memory: 16133 loss: 0.0085 tflops: 4.8640 tokens_per_sec: 80.4393 +2025/09/25 23:24:00 - mmengine - INFO - Iter(train) [ 840/4603] base_lr: 1.9907e-05 lr: 1.9907e-05 eta: 1:13:53 time: 0.9948 data_time: 0.0091 memory: 16133 loss: 0.0107 tflops: 4.4369 tokens_per_sec: 73.3824 +2025/09/25 23:24:11 - mmengine - INFO - Iter(train) [ 850/4603] base_lr: 1.9903e-05 lr: 1.9903e-05 eta: 1:13:37 time: 0.9983 data_time: 0.0086 memory: 16133 loss: 1.2734 tflops: 4.7247 tokens_per_sec: 78.1366 +2025/09/25 23:24:22 - mmengine - INFO - Iter(train) [ 860/4603] base_lr: 1.9897e-05 lr: 1.9897e-05 eta: 1:13:20 time: 1.0081 data_time: 0.0088 memory: 16133 loss: 1.5859 tflops: 6.1804 tokens_per_sec: 102.1764 +2025/09/25 23:24:32 - mmengine - INFO - Iter(train) [ 870/4603] base_lr: 1.9892e-05 lr: 1.9892e-05 eta: 1:13:05 time: 0.9986 data_time: 0.0089 memory: 16133 loss: 0.5312 tflops: 2.9053 tokens_per_sec: 48.0672 +2025/09/25 23:24:43 - mmengine - INFO - Iter(train) [ 880/4603] base_lr: 1.9887e-05 lr: 1.9887e-05 eta: 1:12:49 time: 1.2483 data_time: 0.0091 memory: 16133 loss: 1.9766 tflops: 2.6149 tokens_per_sec: 43.2585 +2025/09/25 23:24:54 - mmengine - INFO - Iter(train) [ 890/4603] base_lr: 1.9882e-05 lr: 1.9882e-05 eta: 1:12:34 time: 1.3625 data_time: 0.0087 memory: 16133 loss: 0.0223 tflops: 2.6622 tokens_per_sec: 44.0374 +2025/09/25 23:25:05 - mmengine - INFO - Iter(train) [ 900/4603] base_lr: 1.9876e-05 lr: 1.9876e-05 eta: 1:12:17 time: 0.9669 data_time: 0.0088 memory: 16133 loss: 1.6094 tflops: 5.3161 tokens_per_sec: 87.9093 +2025/09/25 23:25:16 - mmengine - INFO - Iter(train) [ 910/4603] base_lr: 1.9870e-05 lr: 1.9870e-05 eta: 1:12:02 time: 1.0000 data_time: 0.0094 memory: 16133 loss: 1.3906 tflops: 4.5349 tokens_per_sec: 75.0008 +2025/09/25 23:25:26 - mmengine - INFO - Iter(train) [ 920/4603] base_lr: 1.9864e-05 lr: 1.9864e-05 eta: 1:11:47 time: 1.0256 data_time: 0.0091 memory: 16133 loss: 1.3828 tflops: 4.3037 tokens_per_sec: 71.1793 +2025/09/25 23:25:37 - mmengine - INFO - Iter(train) [ 930/4603] base_lr: 1.9858e-05 lr: 1.9858e-05 eta: 1:11:32 time: 1.2585 data_time: 0.0092 memory: 16133 loss: 2.1875 tflops: 3.7478 tokens_per_sec: 61.9802 +2025/09/25 23:25:48 - mmengine - INFO - Iter(train) [ 940/4603] base_lr: 1.9852e-05 lr: 1.9852e-05 eta: 1:11:17 time: 0.9903 data_time: 0.0091 memory: 16133 loss: 1.1406 tflops: 2.6853 tokens_per_sec: 44.4292 +2025/09/25 23:25:59 - mmengine - INFO - Iter(train) [ 950/4603] base_lr: 1.9846e-05 lr: 1.9846e-05 eta: 1:11:00 time: 0.9891 data_time: 0.0088 memory: 16133 loss: 1.6406 tflops: 4.9522 tokens_per_sec: 81.8947 +2025/09/25 23:26:10 - mmengine - INFO - Iter(train) [ 960/4603] base_lr: 1.9840e-05 lr: 1.9840e-05 eta: 1:10:46 time: 1.3839 data_time: 0.0095 memory: 16133 loss: 0.0248 tflops: 3.9332 tokens_per_sec: 65.0357 +2025/09/25 23:26:20 - mmengine - INFO - Iter(train) [ 970/4603] base_lr: 1.9833e-05 lr: 1.9833e-05 eta: 1:10:31 time: 1.0031 data_time: 0.0095 memory: 16133 loss: 0.9883 tflops: 5.0639 tokens_per_sec: 83.7393 +2025/09/25 23:26:31 - mmengine - INFO - Iter(train) [ 980/4603] base_lr: 1.9827e-05 lr: 1.9827e-05 eta: 1:10:16 time: 0.9879 data_time: 0.0093 memory: 16133 loss: 0.0874 tflops: 3.9166 tokens_per_sec: 64.7835 +2025/09/25 23:26:42 - mmengine - INFO - Iter(train) [ 990/4603] base_lr: 1.9820e-05 lr: 1.9820e-05 eta: 1:10:00 time: 0.9827 data_time: 0.0091 memory: 16133 loss: 1.1562 tflops: 5.1692 tokens_per_sec: 85.4809 +2025/09/25 23:26:52 - mmengine - INFO - Exp name: temp_config_stage_2b_20250925_230352 +2025/09/25 23:26:52 - mmengine - INFO - Iter(train) [1000/4603] base_lr: 1.9813e-05 lr: 1.9813e-05 eta: 1:09:45 time: 1.0026 data_time: 0.0087 memory: 16133 loss: 1.3359 tflops: 5.0667 tokens_per_sec: 83.7860 +2025/09/25 23:27:03 - mmengine - INFO - Iter(train) [1010/4603] base_lr: 1.9806e-05 lr: 1.9806e-05 eta: 1:09:30 time: 1.3063 data_time: 0.0091 memory: 16133 loss: 1.6641 tflops: 3.5180 tokens_per_sec: 58.1814 +2025/09/25 23:27:13 - mmengine - INFO - Iter(train) [1020/4603] base_lr: 1.9799e-05 lr: 1.9799e-05 eta: 1:09:15 time: 0.9959 data_time: 0.0091 memory: 16133 loss: 0.0640 tflops: 3.8850 tokens_per_sec: 64.2611 +2025/09/25 23:27:18 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 23:27:18 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:27:24 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. This pattern is characterized by the formation of distinct lobules, which are separated by fibrous tissue. The lobules are composed of a dense cluster of tumor cells, creating a well-defined structure. This organization is a key feature of the tumor and contributes to its overall appearance.<|im_end|> + +2025/09/25 23:27:24 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:27:36 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a high-grade squamous cell carcinoma with significant invasion into the surrounding tissue. The tumor exhibits high nuclear grade, high mitotic activity, and a high proliferation index, all indicative of aggressive behavior. The tumor is characterized by a high degree of pleomorphism, with large, irregular nuclei and prominent nucleoli. The presence of keratinization and keratin pearls is noted, suggesting a well-differentiated squamous cell carcinoma. The tumor cells show a high degree of pleomorphism, with large, irregular nuclei and prominent nucleoli. The tumor is infiltrative, with a high mitotic rate and a high proliferation index, indicating aggressive growth. The tumor is well-differentiated, with keratinization and keratin pearls, and shows significant invasion into the surrounding tissue.<|im_end|> + +2025/09/25 23:27:36 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:27:37 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/25 23:28:20 - mmengine - INFO - Iter(train) [1030/4603] base_lr: 1.9792e-05 lr: 1.9792e-05 eta: 1:12:15 time: 0.9905 data_time: 0.0092 memory: 16133 loss: 1.3828 tflops: 5.4953 tokens_per_sec: 90.8653 +2025/09/25 23:28:31 - mmengine - INFO - Iter(train) [1040/4603] base_lr: 1.9785e-05 lr: 1.9785e-05 eta: 1:11:57 time: 0.9842 data_time: 0.0090 memory: 16133 loss: 0.0157 tflops: 2.9478 tokens_per_sec: 48.7695 +2025/09/25 23:28:42 - mmengine - INFO - Iter(train) [1050/4603] base_lr: 1.9777e-05 lr: 1.9777e-05 eta: 1:11:41 time: 1.0177 data_time: 0.0089 memory: 16133 loss: 0.0923 tflops: 4.7532 tokens_per_sec: 78.6060 +2025/09/25 23:28:52 - mmengine - INFO - Iter(train) [1060/4603] base_lr: 1.9769e-05 lr: 1.9769e-05 eta: 1:11:24 time: 1.3480 data_time: 0.0087 memory: 16133 loss: 0.5156 tflops: 2.1523 tokens_per_sec: 35.6088 +2025/09/25 23:29:03 - mmengine - INFO - Iter(train) [1070/4603] base_lr: 1.9762e-05 lr: 1.9762e-05 eta: 1:11:06 time: 1.0147 data_time: 0.0092 memory: 16133 loss: 1.4062 tflops: 3.9920 tokens_per_sec: 66.0281 +2025/09/25 23:29:14 - mmengine - INFO - Iter(train) [1080/4603] base_lr: 1.9754e-05 lr: 1.9754e-05 eta: 1:10:50 time: 1.0169 data_time: 0.0092 memory: 16133 loss: 0.5195 tflops: 3.8048 tokens_per_sec: 62.9344 +2025/09/25 23:29:25 - mmengine - INFO - Iter(train) [1090/4603] base_lr: 1.9746e-05 lr: 1.9746e-05 eta: 1:10:34 time: 1.2444 data_time: 0.0094 memory: 16133 loss: 0.0106 tflops: 3.2552 tokens_per_sec: 53.8422 +2025/09/25 23:29:35 - mmengine - INFO - Iter(train) [1100/4603] base_lr: 1.9738e-05 lr: 1.9738e-05 eta: 1:10:18 time: 0.9913 data_time: 0.0091 memory: 16133 loss: 1.2188 tflops: 4.5748 tokens_per_sec: 75.6600 +2025/09/25 23:29:46 - mmengine - INFO - Iter(train) [1110/4603] base_lr: 1.9729e-05 lr: 1.9729e-05 eta: 1:10:01 time: 0.9922 data_time: 0.0093 memory: 16133 loss: 0.1641 tflops: 4.2047 tokens_per_sec: 69.5449 +2025/09/25 23:29:56 - mmengine - INFO - Iter(train) [1120/4603] base_lr: 1.9721e-05 lr: 1.9721e-05 eta: 1:09:44 time: 0.9860 data_time: 0.0087 memory: 16133 loss: 0.6641 tflops: 2.8198 tokens_per_sec: 46.6532 +2025/09/25 23:30:07 - mmengine - INFO - Iter(train) [1130/4603] base_lr: 1.9713e-05 lr: 1.9713e-05 eta: 1:09:27 time: 0.9946 data_time: 0.0091 memory: 16133 loss: 1.1719 tflops: 4.8029 tokens_per_sec: 79.4284 +2025/09/25 23:30:17 - mmengine - INFO - Iter(train) [1140/4603] base_lr: 1.9704e-05 lr: 1.9704e-05 eta: 1:09:12 time: 1.2457 data_time: 0.0092 memory: 16133 loss: 1.3203 tflops: 3.8346 tokens_per_sec: 63.4158 +2025/09/25 23:30:28 - mmengine - INFO - Iter(train) [1150/4603] base_lr: 1.9695e-05 lr: 1.9695e-05 eta: 1:08:56 time: 1.3675 data_time: 0.0089 memory: 16133 loss: 1.1562 tflops: 3.6704 tokens_per_sec: 60.6963 +2025/09/25 23:30:39 - mmengine - INFO - Iter(train) [1160/4603] base_lr: 1.9686e-05 lr: 1.9686e-05 eta: 1:08:41 time: 1.3190 data_time: 0.0089 memory: 16133 loss: 1.7734 tflops: 3.3922 tokens_per_sec: 56.1031 +2025/09/25 23:30:50 - mmengine - INFO - Iter(train) [1170/4603] base_lr: 1.9677e-05 lr: 1.9677e-05 eta: 1:08:25 time: 1.0089 data_time: 0.0093 memory: 16133 loss: 1.4609 tflops: 4.0749 tokens_per_sec: 67.3986 +2025/09/25 23:31:00 - mmengine - INFO - Iter(train) [1180/4603] base_lr: 1.9668e-05 lr: 1.9668e-05 eta: 1:08:09 time: 1.3686 data_time: 0.0095 memory: 16133 loss: 0.0654 tflops: 2.8271 tokens_per_sec: 46.7630 +2025/09/25 23:31:11 - mmengine - INFO - Iter(train) [1190/4603] base_lr: 1.9659e-05 lr: 1.9659e-05 eta: 1:07:53 time: 0.9937 data_time: 0.0096 memory: 16133 loss: 0.0967 tflops: 4.0763 tokens_per_sec: 67.4235 +2025/09/25 23:31:22 - mmengine - INFO - Iter(train) [1200/4603] base_lr: 1.9650e-05 lr: 1.9650e-05 eta: 1:07:38 time: 1.3382 data_time: 0.0100 memory: 16133 loss: 0.1436 tflops: 3.1174 tokens_per_sec: 51.5615 +2025/09/25 23:31:32 - mmengine - INFO - Iter(train) [1210/4603] base_lr: 1.9640e-05 lr: 1.9640e-05 eta: 1:07:23 time: 1.2687 data_time: 0.0091 memory: 16133 loss: 0.0649 tflops: 3.8606 tokens_per_sec: 63.8434 +2025/09/25 23:31:43 - mmengine - INFO - Iter(train) [1220/4603] base_lr: 1.9631e-05 lr: 1.9631e-05 eta: 1:07:08 time: 1.0045 data_time: 0.0095 memory: 16133 loss: 0.1416 tflops: 3.4302 tokens_per_sec: 56.7441 +2025/09/25 23:31:54 - mmengine - INFO - Iter(train) [1230/4603] base_lr: 1.9621e-05 lr: 1.9621e-05 eta: 1:06:53 time: 1.3086 data_time: 0.0087 memory: 16133 loss: 0.2695 tflops: 2.3095 tokens_per_sec: 38.2078 +2025/09/25 23:32:05 - mmengine - INFO - Iter(train) [1240/4603] base_lr: 1.9611e-05 lr: 1.9611e-05 eta: 1:06:38 time: 1.0026 data_time: 0.0093 memory: 16133 loss: 2.2031 tflops: 3.9193 tokens_per_sec: 64.8285 +2025/09/25 23:32:15 - mmengine - INFO - Iter(train) [1250/4603] base_lr: 1.9601e-05 lr: 1.9601e-05 eta: 1:06:22 time: 0.9912 data_time: 0.0085 memory: 16133 loss: 0.1797 tflops: 3.1102 tokens_per_sec: 51.4552 +2025/09/25 23:32:26 - mmengine - INFO - Iter(train) [1260/4603] base_lr: 1.9591e-05 lr: 1.9591e-05 eta: 1:06:07 time: 1.0249 data_time: 0.0088 memory: 16133 loss: 1.7344 tflops: 4.7789 tokens_per_sec: 79.0303 +2025/09/25 23:32:37 - mmengine - INFO - Iter(train) [1270/4603] base_lr: 1.9581e-05 lr: 1.9581e-05 eta: 1:05:54 time: 1.0365 data_time: 0.0089 memory: 16133 loss: 0.2139 tflops: 2.5658 tokens_per_sec: 42.4520 +2025/09/25 23:32:48 - mmengine - INFO - Iter(train) [1280/4603] base_lr: 1.9570e-05 lr: 1.9570e-05 eta: 1:05:40 time: 1.3490 data_time: 0.0090 memory: 16133 loss: 0.1099 tflops: 3.3616 tokens_per_sec: 55.5952 +2025/09/25 23:32:59 - mmengine - INFO - Iter(train) [1290/4603] base_lr: 1.9560e-05 lr: 1.9560e-05 eta: 1:05:25 time: 1.0035 data_time: 0.0076 memory: 16133 loss: 1.0703 tflops: 3.9160 tokens_per_sec: 64.7731 +2025/09/25 23:33:10 - mmengine - INFO - Iter(train) [1300/4603] base_lr: 1.9549e-05 lr: 1.9549e-05 eta: 1:05:11 time: 0.9851 data_time: 0.0091 memory: 16133 loss: 1.4141 tflops: 5.0951 tokens_per_sec: 84.2561 +2025/09/25 23:33:20 - mmengine - INFO - Iter(train) [1310/4603] base_lr: 1.9539e-05 lr: 1.9539e-05 eta: 1:04:55 time: 0.9716 data_time: 0.0093 memory: 16133 loss: 0.1289 tflops: 4.6676 tokens_per_sec: 77.1949 +2025/09/25 23:33:31 - mmengine - INFO - Iter(train) [1320/4603] base_lr: 1.9528e-05 lr: 1.9528e-05 eta: 1:04:41 time: 1.0203 data_time: 0.0090 memory: 16133 loss: 0.2041 tflops: 2.7842 tokens_per_sec: 46.0643 +2025/09/25 23:33:42 - mmengine - INFO - Iter(train) [1330/4603] base_lr: 1.9517e-05 lr: 1.9517e-05 eta: 1:04:27 time: 1.2515 data_time: 0.0088 memory: 16133 loss: 0.0684 tflops: 3.6718 tokens_per_sec: 60.7255 +2025/09/25 23:33:52 - mmengine - INFO - Iter(train) [1340/4603] base_lr: 1.9506e-05 lr: 1.9506e-05 eta: 1:04:12 time: 1.0086 data_time: 0.0093 memory: 16133 loss: 0.1904 tflops: 2.8165 tokens_per_sec: 46.5985 +2025/09/25 23:34:04 - mmengine - INFO - Iter(train) [1350/4603] base_lr: 1.9494e-05 lr: 1.9494e-05 eta: 1:03:58 time: 1.0141 data_time: 0.0087 memory: 16133 loss: 0.0525 tflops: 4.7107 tokens_per_sec: 77.9043 +2025/09/25 23:34:15 - mmengine - INFO - Iter(train) [1360/4603] base_lr: 1.9483e-05 lr: 1.9483e-05 eta: 1:03:45 time: 1.0394 data_time: 0.0088 memory: 16133 loss: 0.2061 tflops: 2.6168 tokens_per_sec: 43.2948 +2025/09/25 23:34:25 - mmengine - INFO - Iter(train) [1370/4603] base_lr: 1.9472e-05 lr: 1.9472e-05 eta: 1:03:30 time: 1.0166 data_time: 0.0092 memory: 16133 loss: 1.3516 tflops: 4.8778 tokens_per_sec: 80.6631 +2025/09/25 23:34:37 - mmengine - INFO - Iter(train) [1380/4603] base_lr: 1.9460e-05 lr: 1.9460e-05 eta: 1:03:17 time: 1.2238 data_time: 0.0094 memory: 16133 loss: 0.0549 tflops: 3.3100 tokens_per_sec: 54.7485 +2025/09/25 23:34:47 - mmengine - INFO - Iter(train) [1390/4603] base_lr: 1.9448e-05 lr: 1.9448e-05 eta: 1:03:02 time: 1.0014 data_time: 0.0091 memory: 16133 loss: 1.7500 tflops: 3.9847 tokens_per_sec: 65.9090 +2025/09/25 23:34:58 - mmengine - INFO - Iter(train) [1400/4603] base_lr: 1.9437e-05 lr: 1.9437e-05 eta: 1:02:48 time: 1.3178 data_time: 0.0088 memory: 16133 loss: 1.5078 tflops: 5.2333 tokens_per_sec: 86.5065 +2025/09/25 23:35:08 - mmengine - INFO - Iter(train) [1410/4603] base_lr: 1.9425e-05 lr: 1.9425e-05 eta: 1:02:34 time: 1.0276 data_time: 0.0088 memory: 16133 loss: 0.1943 tflops: 4.5307 tokens_per_sec: 74.9291 +2025/09/25 23:35:19 - mmengine - INFO - Iter(train) [1420/4603] base_lr: 1.9413e-05 lr: 1.9413e-05 eta: 1:02:20 time: 0.9853 data_time: 0.0087 memory: 16133 loss: 0.0586 tflops: 5.9543 tokens_per_sec: 98.4458 +2025/09/25 23:35:30 - mmengine - INFO - Iter(train) [1430/4603] base_lr: 1.9400e-05 lr: 1.9400e-05 eta: 1:02:06 time: 1.1812 data_time: 0.0093 memory: 16133 loss: 1.3203 tflops: 4.2491 tokens_per_sec: 70.2670 +2025/09/25 23:35:41 - mmengine - INFO - Iter(train) [1440/4603] base_lr: 1.9388e-05 lr: 1.9388e-05 eta: 1:01:52 time: 1.0215 data_time: 0.0086 memory: 16133 loss: 0.2217 tflops: 2.8402 tokens_per_sec: 46.9902 +2025/09/25 23:35:51 - mmengine - INFO - Iter(train) [1450/4603] base_lr: 1.9376e-05 lr: 1.9376e-05 eta: 1:01:38 time: 1.0078 data_time: 0.0085 memory: 16133 loss: 0.1396 tflops: 4.4396 tokens_per_sec: 73.4250 +2025/09/25 23:36:02 - mmengine - INFO - Iter(train) [1460/4603] base_lr: 1.9363e-05 lr: 1.9363e-05 eta: 1:01:23 time: 0.9995 data_time: 0.0092 memory: 16133 loss: 0.2061 tflops: 2.7212 tokens_per_sec: 45.0230 +2025/09/25 23:36:13 - mmengine - INFO - Iter(train) [1470/4603] base_lr: 1.9350e-05 lr: 1.9350e-05 eta: 1:01:10 time: 0.9841 data_time: 0.0089 memory: 16133 loss: 1.5234 tflops: 4.4852 tokens_per_sec: 74.1797 +2025/09/25 23:36:23 - mmengine - INFO - Iter(train) [1480/4603] base_lr: 1.9338e-05 lr: 1.9338e-05 eta: 1:00:56 time: 1.0176 data_time: 0.0096 memory: 16133 loss: 1.3906 tflops: 4.8727 tokens_per_sec: 80.5802 +2025/09/25 23:36:34 - mmengine - INFO - Iter(train) [1490/4603] base_lr: 1.9325e-05 lr: 1.9325e-05 eta: 1:00:43 time: 1.3244 data_time: 0.0090 memory: 16133 loss: 1.3203 tflops: 3.6982 tokens_per_sec: 61.1581 +2025/09/25 23:36:45 - mmengine - INFO - Iter(train) [1500/4603] base_lr: 1.9312e-05 lr: 1.9312e-05 eta: 1:00:29 time: 0.9959 data_time: 0.0090 memory: 16133 loss: 1.7969 tflops: 4.1887 tokens_per_sec: 69.2810 +2025/09/25 23:36:56 - mmengine - INFO - Iter(train) [1510/4603] base_lr: 1.9298e-05 lr: 1.9298e-05 eta: 1:00:15 time: 1.2883 data_time: 0.0092 memory: 16133 loss: 0.0713 tflops: 4.0838 tokens_per_sec: 67.5290 +2025/09/25 23:37:06 - mmengine - INFO - Iter(train) [1520/4603] base_lr: 1.9285e-05 lr: 1.9285e-05 eta: 1:00:02 time: 1.0169 data_time: 0.0088 memory: 16133 loss: 0.1582 tflops: 3.5670 tokens_per_sec: 59.0042 +2025/09/25 23:37:18 - mmengine - INFO - Iter(train) [1530/4603] base_lr: 1.9272e-05 lr: 1.9272e-05 eta: 0:59:49 time: 1.3006 data_time: 0.0100 memory: 16133 loss: 2.2031 tflops: 3.5334 tokens_per_sec: 58.4365 +2025/09/25 23:37:24 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 23:37:24 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:37:28 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. This pattern is characteristic of invasive ductal carcinoma. The lobules are well-defined and show a clear boundary between the tumor and the surrounding tissue.<|im_end|> + +2025/09/25 23:37:28 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:37:38 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a well-differentiated squamous cell carcinoma, characterized by the presence of keratinizing squamous cell carcinoma. The tumor is confined to the skin and does not invade deeper structures. The tumor cells exhibit a high degree of differentiation, with minimal nuclear atypia and no mitotic activity. The absence of vascular invasion and lymphovascular invasion supports the non-aggressive nature of the tumor. The absence of lymph node metastasis further confirms the localized nature of the disease. The tumor is well-differentiated, with a low risk of recurrence, and the absence of vascular invasion and lymphovascular invasion indicates a favorable prognosis.<|im_end|> + +2025/09/25 23:37:38 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:37:39 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +A) Poorly differentiated keratinizing squamous cell carcinoma<|im_end|> + +2025/09/25 23:38:22 - mmengine - INFO - Iter(train) [1540/4603] base_lr: 1.9258e-05 lr: 1.9258e-05 eta: 1:01:21 time: 0.9794 data_time: 0.0090 memory: 16133 loss: 0.0669 tflops: 4.9395 tokens_per_sec: 81.6857 +2025/09/25 23:38:33 - mmengine - INFO - Iter(train) [1550/4603] base_lr: 1.9245e-05 lr: 1.9245e-05 eta: 1:01:08 time: 0.9958 data_time: 0.0089 memory: 16133 loss: 0.3535 tflops: 2.6706 tokens_per_sec: 44.1860 +2025/09/25 23:38:44 - mmengine - INFO - Iter(train) [1560/4603] base_lr: 1.9231e-05 lr: 1.9231e-05 eta: 1:00:53 time: 0.9844 data_time: 0.0092 memory: 16133 loss: 1.7266 tflops: 4.6682 tokens_per_sec: 77.2035 +2025/09/25 23:38:54 - mmengine - INFO - Iter(train) [1570/4603] base_lr: 1.9217e-05 lr: 1.9217e-05 eta: 1:00:38 time: 1.3103 data_time: 0.0089 memory: 16133 loss: 1.4375 tflops: 4.0615 tokens_per_sec: 67.1597 +2025/09/25 23:39:05 - mmengine - INFO - Iter(train) [1580/4603] base_lr: 1.9203e-05 lr: 1.9203e-05 eta: 1:00:24 time: 0.9729 data_time: 0.0093 memory: 16133 loss: 0.0464 tflops: 4.2879 tokens_per_sec: 70.9218 +2025/09/25 23:39:15 - mmengine - INFO - Iter(train) [1590/4603] base_lr: 1.9189e-05 lr: 1.9189e-05 eta: 1:00:09 time: 0.9899 data_time: 0.0088 memory: 16133 loss: 0.1182 tflops: 4.3978 tokens_per_sec: 72.7355 +2025/09/25 23:39:26 - mmengine - INFO - Iter(train) [1600/4603] base_lr: 1.9175e-05 lr: 1.9175e-05 eta: 0:59:54 time: 1.2515 data_time: 0.0089 memory: 16133 loss: 2.0312 tflops: 4.1071 tokens_per_sec: 67.9158 +2025/09/25 23:39:37 - mmengine - INFO - Iter(train) [1610/4603] base_lr: 1.9160e-05 lr: 1.9160e-05 eta: 0:59:40 time: 1.0005 data_time: 0.0091 memory: 16133 loss: 0.1226 tflops: 3.5044 tokens_per_sec: 57.9703 +2025/09/25 23:39:48 - mmengine - INFO - Iter(train) [1620/4603] base_lr: 1.9146e-05 lr: 1.9146e-05 eta: 0:59:26 time: 0.9966 data_time: 0.0090 memory: 16133 loss: 0.0737 tflops: 4.4898 tokens_per_sec: 74.2552 +2025/09/25 23:39:58 - mmengine - INFO - Iter(train) [1630/4603] base_lr: 1.9131e-05 lr: 1.9131e-05 eta: 0:59:12 time: 1.0046 data_time: 0.0092 memory: 16133 loss: 0.1191 tflops: 3.8515 tokens_per_sec: 63.7069 +2025/09/25 23:40:09 - mmengine - INFO - Iter(train) [1640/4603] base_lr: 1.9117e-05 lr: 1.9117e-05 eta: 0:58:58 time: 1.3550 data_time: 0.0088 memory: 16133 loss: 1.5469 tflops: 2.7215 tokens_per_sec: 45.0173 +2025/09/25 23:40:19 - mmengine - INFO - Iter(train) [1650/4603] base_lr: 1.9102e-05 lr: 1.9102e-05 eta: 0:58:43 time: 0.9817 data_time: 0.0091 memory: 16133 loss: 0.2393 tflops: 3.7565 tokens_per_sec: 62.1385 +2025/09/25 23:40:30 - mmengine - INFO - Iter(train) [1660/4603] base_lr: 1.9087e-05 lr: 1.9087e-05 eta: 0:58:29 time: 0.9888 data_time: 0.0087 memory: 16133 loss: 0.2070 tflops: 4.1579 tokens_per_sec: 68.7718 +2025/09/25 23:40:41 - mmengine - INFO - Iter(train) [1670/4603] base_lr: 1.9072e-05 lr: 1.9072e-05 eta: 0:58:16 time: 0.9985 data_time: 0.0090 memory: 16133 loss: 0.2031 tflops: 2.9055 tokens_per_sec: 48.0700 +2025/09/25 23:40:52 - mmengine - INFO - Iter(train) [1680/4603] base_lr: 1.9057e-05 lr: 1.9057e-05 eta: 0:58:01 time: 1.0101 data_time: 0.0090 memory: 16133 loss: 0.1621 tflops: 3.7108 tokens_per_sec: 61.3815 +2025/09/25 23:41:02 - mmengine - INFO - Iter(train) [1690/4603] base_lr: 1.9041e-05 lr: 1.9041e-05 eta: 0:57:47 time: 1.0096 data_time: 0.0109 memory: 16133 loss: 1.5938 tflops: 6.3512 tokens_per_sec: 104.9959 +2025/09/25 23:41:13 - mmengine - INFO - Iter(train) [1700/4603] base_lr: 1.9026e-05 lr: 1.9026e-05 eta: 0:57:34 time: 1.3444 data_time: 0.0091 memory: 16133 loss: 1.7578 tflops: 3.4181 tokens_per_sec: 56.5302 +2025/09/25 23:41:24 - mmengine - INFO - Iter(train) [1710/4603] base_lr: 1.9010e-05 lr: 1.9010e-05 eta: 0:57:19 time: 1.2651 data_time: 0.0090 memory: 16133 loss: 0.2578 tflops: 1.9108 tokens_per_sec: 31.6169 +2025/09/25 23:41:34 - mmengine - INFO - Iter(train) [1720/4603] base_lr: 1.8995e-05 lr: 1.8995e-05 eta: 0:57:05 time: 0.9852 data_time: 0.0091 memory: 16133 loss: 0.1396 tflops: 4.2344 tokens_per_sec: 70.0356 +2025/09/25 23:41:45 - mmengine - INFO - Iter(train) [1730/4603] base_lr: 1.8979e-05 lr: 1.8979e-05 eta: 0:56:52 time: 0.9875 data_time: 0.0090 memory: 16132 loss: 0.1455 tflops: 3.8568 tokens_per_sec: 63.7965 +2025/09/25 23:41:56 - mmengine - INFO - Iter(train) [1740/4603] base_lr: 1.8963e-05 lr: 1.8963e-05 eta: 0:56:38 time: 0.9862 data_time: 0.0088 memory: 16133 loss: 1.7344 tflops: 5.2733 tokens_per_sec: 87.2003 +2025/09/25 23:42:06 - mmengine - INFO - Iter(train) [1750/4603] base_lr: 1.8947e-05 lr: 1.8947e-05 eta: 0:56:24 time: 1.2328 data_time: 0.0087 memory: 16133 loss: 0.1943 tflops: 2.3534 tokens_per_sec: 38.9354 +2025/09/25 23:42:17 - mmengine - INFO - Iter(train) [1760/4603] base_lr: 1.8931e-05 lr: 1.8931e-05 eta: 0:56:11 time: 1.0297 data_time: 0.0093 memory: 16133 loss: 0.1748 tflops: 4.9333 tokens_per_sec: 81.5793 +2025/09/25 23:42:28 - mmengine - INFO - Iter(train) [1770/4603] base_lr: 1.8915e-05 lr: 1.8915e-05 eta: 0:55:57 time: 1.3283 data_time: 0.0087 memory: 16133 loss: 0.1079 tflops: 3.2774 tokens_per_sec: 54.2058 +2025/09/25 23:42:39 - mmengine - INFO - Iter(train) [1780/4603] base_lr: 1.8898e-05 lr: 1.8898e-05 eta: 0:55:43 time: 1.2347 data_time: 0.0089 memory: 16133 loss: 0.2129 tflops: 2.2028 tokens_per_sec: 36.4452 +2025/09/25 23:42:49 - mmengine - INFO - Iter(train) [1790/4603] base_lr: 1.8882e-05 lr: 1.8882e-05 eta: 0:55:29 time: 0.9832 data_time: 0.0089 memory: 16133 loss: 1.2188 tflops: 5.4742 tokens_per_sec: 90.5178 +2025/09/25 23:43:01 - mmengine - INFO - Iter(train) [1800/4603] base_lr: 1.8865e-05 lr: 1.8865e-05 eta: 0:55:16 time: 1.0185 data_time: 0.0091 memory: 16133 loss: 1.4297 tflops: 4.6310 tokens_per_sec: 76.5861 +2025/09/25 23:43:11 - mmengine - INFO - Iter(train) [1810/4603] base_lr: 1.8849e-05 lr: 1.8849e-05 eta: 0:55:03 time: 0.9700 data_time: 0.0089 memory: 16133 loss: 1.6016 tflops: 5.1742 tokens_per_sec: 85.5648 +2025/09/25 23:43:22 - mmengine - INFO - Iter(train) [1820/4603] base_lr: 1.8832e-05 lr: 1.8832e-05 eta: 0:54:49 time: 1.0016 data_time: 0.0095 memory: 16133 loss: 0.7461 tflops: 4.8298 tokens_per_sec: 79.8720 +2025/09/25 23:43:33 - mmengine - INFO - Iter(train) [1830/4603] base_lr: 1.8815e-05 lr: 1.8815e-05 eta: 0:54:36 time: 1.0017 data_time: 0.0094 memory: 16133 loss: 0.5352 tflops: 3.9230 tokens_per_sec: 64.8886 +2025/09/25 23:43:43 - mmengine - INFO - Iter(train) [1840/4603] base_lr: 1.8798e-05 lr: 1.8798e-05 eta: 0:54:22 time: 1.2904 data_time: 0.0095 memory: 16133 loss: 1.6484 tflops: 3.2329 tokens_per_sec: 53.4716 +2025/09/25 23:43:54 - mmengine - INFO - Iter(train) [1850/4603] base_lr: 1.8781e-05 lr: 1.8781e-05 eta: 0:54:09 time: 1.0125 data_time: 0.0094 memory: 16133 loss: 1.7188 tflops: 5.2561 tokens_per_sec: 86.9125 +2025/09/25 23:44:05 - mmengine - INFO - Iter(train) [1860/4603] base_lr: 1.8764e-05 lr: 1.8764e-05 eta: 0:53:56 time: 0.9922 data_time: 0.0089 memory: 16133 loss: 1.3516 tflops: 7.0121 tokens_per_sec: 115.9072 +2025/09/25 23:44:16 - mmengine - INFO - Iter(train) [1870/4603] base_lr: 1.8746e-05 lr: 1.8746e-05 eta: 0:53:42 time: 1.0279 data_time: 0.0082 memory: 16133 loss: 0.1807 tflops: 3.1756 tokens_per_sec: 52.5336 +2025/09/25 23:44:27 - mmengine - INFO - Iter(train) [1880/4603] base_lr: 1.8729e-05 lr: 1.8729e-05 eta: 0:53:30 time: 0.9852 data_time: 0.0087 memory: 16133 loss: 0.1719 tflops: 3.0062 tokens_per_sec: 49.7354 +2025/09/25 23:44:38 - mmengine - INFO - Iter(train) [1890/4603] base_lr: 1.8711e-05 lr: 1.8711e-05 eta: 0:53:16 time: 1.0064 data_time: 0.0089 memory: 16133 loss: 0.1250 tflops: 4.1452 tokens_per_sec: 68.5607 +2025/09/25 23:44:49 - mmengine - INFO - Iter(train) [1900/4603] base_lr: 1.8694e-05 lr: 1.8694e-05 eta: 0:53:03 time: 1.3082 data_time: 0.0094 memory: 16133 loss: 0.1328 tflops: 3.4664 tokens_per_sec: 57.3285 +2025/09/25 23:44:59 - mmengine - INFO - Iter(train) [1910/4603] base_lr: 1.8676e-05 lr: 1.8676e-05 eta: 0:52:50 time: 0.9850 data_time: 0.0095 memory: 16133 loss: 1.1797 tflops: 5.3416 tokens_per_sec: 88.3276 +2025/09/25 23:45:10 - mmengine - INFO - Iter(train) [1920/4603] base_lr: 1.8658e-05 lr: 1.8658e-05 eta: 0:52:36 time: 1.2618 data_time: 0.0097 memory: 16133 loss: 0.1904 tflops: 2.5869 tokens_per_sec: 42.7947 +2025/09/25 23:45:21 - mmengine - INFO - Iter(train) [1930/4603] base_lr: 1.8640e-05 lr: 1.8640e-05 eta: 0:52:23 time: 0.9910 data_time: 0.0089 memory: 16133 loss: 0.1992 tflops: 3.9042 tokens_per_sec: 64.5786 +2025/09/25 23:45:31 - mmengine - INFO - Iter(train) [1940/4603] base_lr: 1.8622e-05 lr: 1.8622e-05 eta: 0:52:10 time: 0.9984 data_time: 0.0089 memory: 16133 loss: 1.1641 tflops: 3.6331 tokens_per_sec: 60.0979 +2025/09/25 23:45:42 - mmengine - INFO - Iter(train) [1950/4603] base_lr: 1.8603e-05 lr: 1.8603e-05 eta: 0:51:57 time: 1.0255 data_time: 0.0087 memory: 16133 loss: 1.3672 tflops: 4.8945 tokens_per_sec: 80.9393 +2025/09/25 23:45:53 - mmengine - INFO - Iter(train) [1960/4603] base_lr: 1.8585e-05 lr: 1.8585e-05 eta: 0:51:44 time: 1.0016 data_time: 0.0097 memory: 16133 loss: 1.4297 tflops: 5.0110 tokens_per_sec: 82.8655 +2025/09/25 23:46:04 - mmengine - INFO - Iter(train) [1970/4603] base_lr: 1.8567e-05 lr: 1.8567e-05 eta: 0:51:31 time: 1.0209 data_time: 0.0098 memory: 16133 loss: 0.0854 tflops: 4.0863 tokens_per_sec: 67.5867 +2025/09/25 23:46:14 - mmengine - INFO - Iter(train) [1980/4603] base_lr: 1.8548e-05 lr: 1.8548e-05 eta: 0:51:18 time: 0.9981 data_time: 0.0101 memory: 16133 loss: 1.8594 tflops: 3.8765 tokens_per_sec: 64.1200 +2025/09/25 23:46:25 - mmengine - INFO - Iter(train) [1990/4603] base_lr: 1.8529e-05 lr: 1.8529e-05 eta: 0:51:05 time: 1.3069 data_time: 0.0092 memory: 16133 loss: 1.3125 tflops: 3.2847 tokens_per_sec: 54.3265 +2025/09/25 23:46:36 - mmengine - INFO - Exp name: temp_config_stage_2b_20250925_230352 +2025/09/25 23:46:36 - mmengine - INFO - Iter(train) [2000/4603] base_lr: 1.8511e-05 lr: 1.8511e-05 eta: 0:50:52 time: 1.0018 data_time: 0.0094 memory: 16133 loss: 0.1406 tflops: 8.0942 tokens_per_sec: 133.7616 +2025/09/25 23:46:47 - mmengine - INFO - Iter(train) [2010/4603] base_lr: 1.8492e-05 lr: 1.8492e-05 eta: 0:50:39 time: 1.0126 data_time: 0.0088 memory: 16133 loss: 0.1162 tflops: 4.0602 tokens_per_sec: 67.1563 +2025/09/25 23:46:58 - mmengine - INFO - Iter(train) [2020/4603] base_lr: 1.8473e-05 lr: 1.8473e-05 eta: 0:50:27 time: 0.9832 data_time: 0.0096 memory: 16133 loss: 1.8047 tflops: 4.7970 tokens_per_sec: 79.3322 +2025/09/25 23:47:09 - mmengine - INFO - Iter(train) [2030/4603] base_lr: 1.8453e-05 lr: 1.8453e-05 eta: 0:50:14 time: 1.2932 data_time: 0.0088 memory: 16133 loss: 1.5938 tflops: 3.0387 tokens_per_sec: 50.2629 +2025/09/25 23:47:20 - mmengine - INFO - Iter(train) [2040/4603] base_lr: 1.8434e-05 lr: 1.8434e-05 eta: 0:50:01 time: 1.3602 data_time: 0.0091 memory: 16133 loss: 1.6719 tflops: 4.4469 tokens_per_sec: 73.5203 +2025/09/25 23:47:29 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 23:47:29 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:47:31 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern, forming distinct lobules. This pattern is characteristic of invasive ductal carcinoma.<|im_end|> + +2025/09/25 23:47:31 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:47:39 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant nuclear pleomorphism and high mitotic activity, consistent with a high-grade malignancy. The tumor exhibits extensive necrosis and vascular invasion, suggesting aggressive behavior. The presence of lymphovascular invasion and perineural invasion further supports the aggressive nature of the tumor. The absence of specific molecular markers indicates a lack of targeted therapy options. The tumor is poorly differentiated, indicating a high-grade malignancy with poor prognosis.<|im_end|> + +2025/09/25 23:47:39 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:47:40 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/25 23:47:48 - mmengine - INFO - Iter(train) [2050/4603] base_lr: 1.8415e-05 lr: 1.8415e-05 eta: 0:50:10 time: 1.2489 data_time: 0.0091 memory: 16133 loss: 0.3652 tflops: 3.7764 tokens_per_sec: 62.4536 +2025/09/25 23:47:58 - mmengine - INFO - Iter(train) [2060/4603] base_lr: 1.8395e-05 lr: 1.8395e-05 eta: 0:49:57 time: 0.9894 data_time: 0.0093 memory: 16133 loss: 0.1045 tflops: 4.7059 tokens_per_sec: 77.8264 +2025/09/25 23:48:09 - mmengine - INFO - Iter(train) [2070/4603] base_lr: 1.8376e-05 lr: 1.8376e-05 eta: 0:49:44 time: 0.9996 data_time: 0.0090 memory: 16133 loss: 0.0952 tflops: 4.1735 tokens_per_sec: 69.0289 +2025/09/25 23:48:20 - mmengine - INFO - Iter(train) [2080/4603] base_lr: 1.8356e-05 lr: 1.8356e-05 eta: 0:49:31 time: 0.9765 data_time: 0.0083 memory: 16133 loss: 0.3594 tflops: 2.9093 tokens_per_sec: 48.1332 +2025/09/25 23:48:31 - mmengine - INFO - Iter(train) [2090/4603] base_lr: 1.8336e-05 lr: 1.8336e-05 eta: 0:49:18 time: 1.0164 data_time: 0.0091 memory: 16133 loss: 1.3828 tflops: 4.2829 tokens_per_sec: 70.8350 +2025/09/25 23:48:41 - mmengine - INFO - Iter(train) [2100/4603] base_lr: 1.8316e-05 lr: 1.8316e-05 eta: 0:49:05 time: 1.3516 data_time: 0.0092 memory: 16133 loss: 1.0391 tflops: 2.9521 tokens_per_sec: 48.8295 +2025/09/25 23:48:52 - mmengine - INFO - Iter(train) [2110/4603] base_lr: 1.8296e-05 lr: 1.8296e-05 eta: 0:48:52 time: 1.0042 data_time: 0.0098 memory: 16133 loss: 0.2109 tflops: 2.8890 tokens_per_sec: 47.7969 +2025/09/25 23:49:03 - mmengine - INFO - Iter(train) [2120/4603] base_lr: 1.8276e-05 lr: 1.8276e-05 eta: 0:48:39 time: 0.9867 data_time: 0.0089 memory: 16133 loss: 1.5000 tflops: 5.1479 tokens_per_sec: 85.1280 +2025/09/25 23:49:14 - mmengine - INFO - Iter(train) [2130/4603] base_lr: 1.8256e-05 lr: 1.8256e-05 eta: 0:48:26 time: 1.3017 data_time: 0.0088 memory: 16133 loss: 1.0312 tflops: 3.9953 tokens_per_sec: 66.0663 +2025/09/25 23:49:25 - mmengine - INFO - Iter(train) [2140/4603] base_lr: 1.8236e-05 lr: 1.8236e-05 eta: 0:48:14 time: 1.0424 data_time: 0.0092 memory: 16133 loss: 0.0203 tflops: 4.6986 tokens_per_sec: 77.7019 +2025/09/25 23:49:36 - mmengine - INFO - Iter(train) [2150/4603] base_lr: 1.8215e-05 lr: 1.8215e-05 eta: 0:48:01 time: 1.0113 data_time: 0.0088 memory: 16133 loss: 1.2734 tflops: 4.4844 tokens_per_sec: 74.1653 +2025/09/25 23:49:47 - mmengine - INFO - Iter(train) [2160/4603] base_lr: 1.8195e-05 lr: 1.8195e-05 eta: 0:47:48 time: 1.3205 data_time: 0.0101 memory: 16133 loss: 1.5000 tflops: 3.3884 tokens_per_sec: 56.0393 +2025/09/25 23:49:58 - mmengine - INFO - Iter(train) [2170/4603] base_lr: 1.8174e-05 lr: 1.8174e-05 eta: 0:47:35 time: 0.9800 data_time: 0.0078 memory: 16133 loss: 0.0053 tflops: 3.8247 tokens_per_sec: 63.2650 +2025/09/25 23:50:09 - mmengine - INFO - Iter(train) [2180/4603] base_lr: 1.8153e-05 lr: 1.8153e-05 eta: 0:47:23 time: 1.0289 data_time: 0.0091 memory: 16133 loss: 0.4980 tflops: 2.8199 tokens_per_sec: 46.6533 +2025/09/25 23:50:19 - mmengine - INFO - Iter(train) [2190/4603] base_lr: 1.8133e-05 lr: 1.8133e-05 eta: 0:47:10 time: 1.2224 data_time: 0.0091 memory: 16133 loss: 0.0051 tflops: 3.4127 tokens_per_sec: 56.4446 +2025/09/25 23:50:30 - mmengine - INFO - Iter(train) [2200/4603] base_lr: 1.8112e-05 lr: 1.8112e-05 eta: 0:46:57 time: 1.0142 data_time: 0.0088 memory: 16133 loss: 0.4766 tflops: 3.2185 tokens_per_sec: 53.2432 +2025/09/25 23:50:41 - mmengine - INFO - Iter(train) [2210/4603] base_lr: 1.8091e-05 lr: 1.8091e-05 eta: 0:46:44 time: 1.0103 data_time: 0.0094 memory: 16133 loss: 1.5000 tflops: 5.3278 tokens_per_sec: 88.0968 +2025/09/25 23:50:51 - mmengine - INFO - Iter(train) [2220/4603] base_lr: 1.8069e-05 lr: 1.8069e-05 eta: 0:46:31 time: 1.0004 data_time: 0.0091 memory: 16133 loss: 1.1562 tflops: 4.4121 tokens_per_sec: 72.9715 +2025/09/25 23:51:02 - mmengine - INFO - Iter(train) [2230/4603] base_lr: 1.8048e-05 lr: 1.8048e-05 eta: 0:46:18 time: 1.0046 data_time: 0.0093 memory: 16133 loss: 0.0017 tflops: 5.0562 tokens_per_sec: 83.6126 +2025/09/25 23:51:13 - mmengine - INFO - Iter(train) [2240/4603] base_lr: 1.8027e-05 lr: 1.8027e-05 eta: 0:46:06 time: 1.2655 data_time: 0.0095 memory: 16133 loss: 0.0564 tflops: 3.3921 tokens_per_sec: 56.1036 +2025/09/25 23:51:24 - mmengine - INFO - Iter(train) [2250/4603] base_lr: 1.8005e-05 lr: 1.8005e-05 eta: 0:45:53 time: 1.3103 data_time: 0.0092 memory: 16133 loss: 0.8594 tflops: 3.0453 tokens_per_sec: 50.3706 +2025/09/25 23:51:34 - mmengine - INFO - Iter(train) [2260/4603] base_lr: 1.7984e-05 lr: 1.7984e-05 eta: 0:45:40 time: 1.0019 data_time: 0.0090 memory: 16132 loss: 0.2637 tflops: 3.1373 tokens_per_sec: 51.9026 +2025/09/25 23:51:45 - mmengine - INFO - Iter(train) [2270/4603] base_lr: 1.7962e-05 lr: 1.7962e-05 eta: 0:45:28 time: 1.0119 data_time: 0.0091 memory: 16133 loss: 0.0069 tflops: 3.9432 tokens_per_sec: 65.2231 +2025/09/25 23:51:56 - mmengine - INFO - Iter(train) [2280/4603] base_lr: 1.7940e-05 lr: 1.7940e-05 eta: 0:45:15 time: 1.0082 data_time: 0.0092 memory: 16133 loss: 0.1475 tflops: 5.0384 tokens_per_sec: 83.3181 +2025/09/25 23:52:06 - mmengine - INFO - Iter(train) [2290/4603] base_lr: 1.7918e-05 lr: 1.7918e-05 eta: 0:45:02 time: 0.9813 data_time: 0.0086 memory: 16133 loss: 0.2637 tflops: 2.7716 tokens_per_sec: 45.8565 +2025/09/25 23:52:17 - mmengine - INFO - Iter(train) [2300/4603] base_lr: 1.7896e-05 lr: 1.7896e-05 eta: 0:44:50 time: 1.0151 data_time: 0.0086 memory: 16133 loss: 0.1758 tflops: 2.9178 tokens_per_sec: 48.2728 +2025/09/25 23:52:28 - mmengine - INFO - Iter(train) [2310/4603] base_lr: 1.7874e-05 lr: 1.7874e-05 eta: 0:44:37 time: 1.0141 data_time: 0.0090 memory: 16133 loss: 0.1240 tflops: 3.1591 tokens_per_sec: 52.2623 +2025/09/25 23:52:39 - mmengine - INFO - Iter(train) [2320/4603] base_lr: 1.7852e-05 lr: 1.7852e-05 eta: 0:44:25 time: 1.0096 data_time: 0.0087 memory: 16133 loss: 0.2891 tflops: 2.8736 tokens_per_sec: 47.5417 +2025/09/25 23:52:50 - mmengine - INFO - Iter(train) [2330/4603] base_lr: 1.7830e-05 lr: 1.7830e-05 eta: 0:44:12 time: 0.9973 data_time: 0.0090 memory: 16133 loss: 0.2373 tflops: 2.9698 tokens_per_sec: 49.1337 +2025/09/25 23:53:01 - mmengine - INFO - Iter(train) [2340/4603] base_lr: 1.7807e-05 lr: 1.7807e-05 eta: 0:44:00 time: 0.9896 data_time: 0.0090 memory: 16133 loss: 1.4297 tflops: 4.7658 tokens_per_sec: 78.8168 +2025/09/25 23:53:11 - mmengine - INFO - Iter(train) [2350/4603] base_lr: 1.7785e-05 lr: 1.7785e-05 eta: 0:43:47 time: 1.0157 data_time: 0.0089 memory: 16133 loss: 0.5195 tflops: 2.9161 tokens_per_sec: 48.2445 +2025/09/25 23:53:22 - mmengine - INFO - Iter(train) [2360/4603] base_lr: 1.7762e-05 lr: 1.7762e-05 eta: 0:43:34 time: 0.9981 data_time: 0.0094 memory: 16133 loss: 0.0767 tflops: 2.7857 tokens_per_sec: 46.0895 +2025/09/25 23:53:33 - mmengine - INFO - Iter(train) [2370/4603] base_lr: 1.7740e-05 lr: 1.7740e-05 eta: 0:43:23 time: 0.9995 data_time: 0.0094 memory: 16133 loss: 0.2754 tflops: 3.6897 tokens_per_sec: 61.0329 +2025/09/25 23:53:44 - mmengine - INFO - Iter(train) [2380/4603] base_lr: 1.7717e-05 lr: 1.7717e-05 eta: 0:43:10 time: 1.0271 data_time: 0.0091 memory: 16133 loss: 0.0047 tflops: 3.7670 tokens_per_sec: 62.3089 +2025/09/25 23:53:54 - mmengine - INFO - Iter(train) [2390/4603] base_lr: 1.7694e-05 lr: 1.7694e-05 eta: 0:42:57 time: 1.0134 data_time: 0.0092 memory: 16133 loss: 0.0042 tflops: 4.1164 tokens_per_sec: 68.0848 +2025/09/25 23:54:05 - mmengine - INFO - Iter(train) [2400/4603] base_lr: 1.7671e-05 lr: 1.7671e-05 eta: 0:42:44 time: 1.0119 data_time: 0.0093 memory: 16133 loss: 0.0356 tflops: 4.4813 tokens_per_sec: 74.1146 +2025/09/25 23:54:16 - mmengine - INFO - Iter(train) [2410/4603] base_lr: 1.7648e-05 lr: 1.7648e-05 eta: 0:42:32 time: 0.9959 data_time: 0.0090 memory: 16133 loss: 0.1865 tflops: 3.9459 tokens_per_sec: 65.2674 +2025/09/25 23:54:26 - mmengine - INFO - Iter(train) [2420/4603] base_lr: 1.7625e-05 lr: 1.7625e-05 eta: 0:42:19 time: 1.0011 data_time: 0.0088 memory: 16133 loss: 1.5078 tflops: 4.5905 tokens_per_sec: 75.9191 +2025/09/25 23:54:37 - mmengine - INFO - Iter(train) [2430/4603] base_lr: 1.7601e-05 lr: 1.7601e-05 eta: 0:42:07 time: 0.9801 data_time: 0.0089 memory: 16133 loss: 0.2363 tflops: 4.9977 tokens_per_sec: 82.6472 +2025/09/25 23:54:48 - mmengine - INFO - Iter(train) [2440/4603] base_lr: 1.7578e-05 lr: 1.7578e-05 eta: 0:41:55 time: 1.0141 data_time: 0.0090 memory: 16133 loss: 0.6602 tflops: 7.5181 tokens_per_sec: 124.2538 +2025/09/25 23:54:58 - mmengine - INFO - Iter(train) [2450/4603] base_lr: 1.7555e-05 lr: 1.7555e-05 eta: 0:41:42 time: 1.2994 data_time: 0.0088 memory: 16133 loss: 0.9336 tflops: 3.0242 tokens_per_sec: 50.0219 +2025/09/25 23:55:09 - mmengine - INFO - Iter(train) [2460/4603] base_lr: 1.7531e-05 lr: 1.7531e-05 eta: 0:41:30 time: 0.9831 data_time: 0.0087 memory: 16133 loss: 1.5938 tflops: 5.7213 tokens_per_sec: 94.5987 +2025/09/25 23:55:20 - mmengine - INFO - Iter(train) [2470/4603] base_lr: 1.7507e-05 lr: 1.7507e-05 eta: 0:41:18 time: 1.0070 data_time: 0.0097 memory: 16133 loss: 0.0024 tflops: 4.9241 tokens_per_sec: 81.4298 +2025/09/25 23:55:31 - mmengine - INFO - Iter(train) [2480/4603] base_lr: 1.7483e-05 lr: 1.7483e-05 eta: 0:41:05 time: 0.9705 data_time: 0.0091 memory: 16133 loss: 1.0078 tflops: 4.4233 tokens_per_sec: 73.1584 +2025/09/25 23:55:42 - mmengine - INFO - Iter(train) [2490/4603] base_lr: 1.7460e-05 lr: 1.7460e-05 eta: 0:40:53 time: 1.2873 data_time: 0.0087 memory: 16133 loss: 0.3164 tflops: 2.2538 tokens_per_sec: 37.2873 +2025/09/25 23:55:52 - mmengine - INFO - Iter(train) [2500/4603] base_lr: 1.7436e-05 lr: 1.7436e-05 eta: 0:40:41 time: 0.9977 data_time: 0.0093 memory: 16133 loss: 0.1260 tflops: 3.2718 tokens_per_sec: 54.1259 +2025/09/25 23:56:03 - mmengine - INFO - Iter(train) [2510/4603] base_lr: 1.7412e-05 lr: 1.7412e-05 eta: 0:40:28 time: 1.0114 data_time: 0.0093 memory: 16133 loss: 1.5703 tflops: 5.5012 tokens_per_sec: 90.9603 +2025/09/25 23:56:14 - mmengine - INFO - Iter(train) [2520/4603] base_lr: 1.7387e-05 lr: 1.7387e-05 eta: 0:40:16 time: 0.9922 data_time: 0.0088 memory: 16133 loss: 1.1406 tflops: 4.9368 tokens_per_sec: 81.6407 +2025/09/25 23:56:24 - mmengine - INFO - Iter(train) [2530/4603] base_lr: 1.7363e-05 lr: 1.7363e-05 eta: 0:40:03 time: 1.0279 data_time: 0.0088 memory: 16133 loss: 0.0020 tflops: 3.8230 tokens_per_sec: 63.2354 +2025/09/25 23:56:35 - mmengine - INFO - Iter(train) [2540/4603] base_lr: 1.7339e-05 lr: 1.7339e-05 eta: 0:39:51 time: 1.3032 data_time: 0.0092 memory: 16133 loss: 0.0510 tflops: 2.4119 tokens_per_sec: 39.9015 +2025/09/25 23:56:46 - mmengine - INFO - Iter(train) [2550/4603] base_lr: 1.7314e-05 lr: 1.7314e-05 eta: 0:39:39 time: 1.0079 data_time: 0.0088 memory: 16133 loss: 0.2715 tflops: 3.2387 tokens_per_sec: 53.5787 +2025/09/25 23:56:57 - mmengine - INFO - Iter(train) [2560/4603] base_lr: 1.7290e-05 lr: 1.7290e-05 eta: 0:39:27 time: 1.3404 data_time: 0.0088 memory: 16133 loss: 0.0840 tflops: 1.9841 tokens_per_sec: 32.8269 +2025/09/25 23:56:57 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/25 23:56:57 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:57:00 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern, which is characteristic of invasive ductal carcinoma. This pattern is seen throughout the slide, indicating a well-defined and organized tumor structure.<|im_end|> + +2025/09/25 23:57:00 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:57:07 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant invasion into the surrounding tissues. The tumor exhibits aggressive growth patterns, including vascular and lymphatic invasion, and is associated with lymph node metastasis. The presence of poorly differentiated features suggests a high-grade malignancy. The tumor is infiltrative and extends beyond the glandular structures, indicating a high risk of local recurrence and distant metastasis.<|im_end|> + +2025/09/25 23:57:07 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/25 23:57:08 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/25 23:57:26 - mmengine - INFO - Iter(train) [2570/4603] base_lr: 1.7265e-05 lr: 1.7265e-05 eta: 0:39:29 time: 1.4133 data_time: 0.0088 memory: 16133 loss: 0.8828 tflops: 2.7805 tokens_per_sec: 45.9911 +2025/09/25 23:57:37 - mmengine - INFO - Iter(train) [2580/4603] base_lr: 1.7240e-05 lr: 1.7240e-05 eta: 0:39:17 time: 0.9931 data_time: 0.0088 memory: 16133 loss: 1.3906 tflops: 7.1883 tokens_per_sec: 118.8158 +2025/09/25 23:57:47 - mmengine - INFO - Iter(train) [2590/4603] base_lr: 1.7216e-05 lr: 1.7216e-05 eta: 0:39:04 time: 0.9789 data_time: 0.0087 memory: 16133 loss: 0.0737 tflops: 5.1273 tokens_per_sec: 84.7886 +2025/09/25 23:57:58 - mmengine - INFO - Iter(train) [2600/4603] base_lr: 1.7191e-05 lr: 1.7191e-05 eta: 0:38:52 time: 0.9937 data_time: 0.0096 memory: 16133 loss: 1.3281 tflops: 5.4774 tokens_per_sec: 90.5698 +2025/09/25 23:58:08 - mmengine - INFO - Iter(train) [2610/4603] base_lr: 1.7166e-05 lr: 1.7166e-05 eta: 0:38:39 time: 0.9901 data_time: 0.0086 memory: 16133 loss: 0.1924 tflops: 3.3580 tokens_per_sec: 55.5517 +2025/09/25 23:58:19 - mmengine - INFO - Iter(train) [2620/4603] base_lr: 1.7141e-05 lr: 1.7141e-05 eta: 0:38:26 time: 0.9990 data_time: 0.0092 memory: 16133 loss: 1.4766 tflops: 5.2664 tokens_per_sec: 87.0850 +2025/09/25 23:58:30 - mmengine - INFO - Iter(train) [2630/4603] base_lr: 1.7115e-05 lr: 1.7115e-05 eta: 0:38:15 time: 0.9889 data_time: 0.0089 memory: 16133 loss: 1.0078 tflops: 6.9129 tokens_per_sec: 114.2713 +2025/09/25 23:58:41 - mmengine - INFO - Iter(train) [2640/4603] base_lr: 1.7090e-05 lr: 1.7090e-05 eta: 0:38:02 time: 0.9762 data_time: 0.0088 memory: 16133 loss: 0.9414 tflops: 4.5215 tokens_per_sec: 74.7801 +2025/09/25 23:58:51 - mmengine - INFO - Iter(train) [2650/4603] base_lr: 1.7065e-05 lr: 1.7065e-05 eta: 0:37:50 time: 0.9742 data_time: 0.0087 memory: 16133 loss: 1.5078 tflops: 4.5927 tokens_per_sec: 75.9573 +2025/09/25 23:59:02 - mmengine - INFO - Iter(train) [2660/4603] base_lr: 1.7039e-05 lr: 1.7039e-05 eta: 0:37:38 time: 0.9975 data_time: 0.0093 memory: 16133 loss: 0.2734 tflops: 2.9086 tokens_per_sec: 48.1213 +2025/09/25 23:59:13 - mmengine - INFO - Iter(train) [2670/4603] base_lr: 1.7014e-05 lr: 1.7014e-05 eta: 0:37:25 time: 0.9842 data_time: 0.0089 memory: 16133 loss: 0.1074 tflops: 4.0545 tokens_per_sec: 67.0626 +2025/09/25 23:59:23 - mmengine - INFO - Iter(train) [2680/4603] base_lr: 1.6988e-05 lr: 1.6988e-05 eta: 0:37:13 time: 0.9927 data_time: 0.0088 memory: 16133 loss: 1.0547 tflops: 5.2998 tokens_per_sec: 87.6374 +2025/09/25 23:59:34 - mmengine - INFO - Iter(train) [2690/4603] base_lr: 1.6962e-05 lr: 1.6962e-05 eta: 0:37:01 time: 0.9670 data_time: 0.0090 memory: 16133 loss: 0.0150 tflops: 4.1891 tokens_per_sec: 69.2887 +2025/09/25 23:59:45 - mmengine - INFO - Iter(train) [2700/4603] base_lr: 1.6936e-05 lr: 1.6936e-05 eta: 0:36:48 time: 1.0181 data_time: 0.0089 memory: 16133 loss: 0.2598 tflops: 2.8498 tokens_per_sec: 47.1476 +2025/09/25 23:59:55 - mmengine - INFO - Iter(train) [2710/4603] base_lr: 1.6911e-05 lr: 1.6911e-05 eta: 0:36:36 time: 1.0247 data_time: 0.0094 memory: 16133 loss: 0.0138 tflops: 4.3075 tokens_per_sec: 71.2421 +2025/09/26 00:00:06 - mmengine - INFO - Iter(train) [2720/4603] base_lr: 1.6885e-05 lr: 1.6885e-05 eta: 0:36:24 time: 0.9940 data_time: 0.0088 memory: 16133 loss: 0.2559 tflops: 2.9187 tokens_per_sec: 48.2877 +2025/09/26 00:00:17 - mmengine - INFO - Iter(train) [2730/4603] base_lr: 1.6858e-05 lr: 1.6858e-05 eta: 0:36:12 time: 1.0000 data_time: 0.0089 memory: 16133 loss: 1.1172 tflops: 4.3534 tokens_per_sec: 72.0016 +2025/09/26 00:00:27 - mmengine - INFO - Iter(train) [2740/4603] base_lr: 1.6832e-05 lr: 1.6832e-05 eta: 0:35:59 time: 0.9707 data_time: 0.0087 memory: 16133 loss: 0.1001 tflops: 3.7368 tokens_per_sec: 61.8130 +2025/09/26 00:00:38 - mmengine - INFO - Iter(train) [2750/4603] base_lr: 1.6806e-05 lr: 1.6806e-05 eta: 0:35:47 time: 1.3318 data_time: 0.0088 memory: 16133 loss: 0.0070 tflops: 3.6776 tokens_per_sec: 60.8177 +2025/09/26 00:00:49 - mmengine - INFO - Iter(train) [2760/4603] base_lr: 1.6780e-05 lr: 1.6780e-05 eta: 0:35:35 time: 0.9679 data_time: 0.0086 memory: 16133 loss: 0.2432 tflops: 2.9975 tokens_per_sec: 49.5918 +2025/09/26 00:00:59 - mmengine - INFO - Iter(train) [2770/4603] base_lr: 1.6753e-05 lr: 1.6753e-05 eta: 0:35:23 time: 0.9932 data_time: 0.0092 memory: 16133 loss: 1.4453 tflops: 4.1394 tokens_per_sec: 68.4664 +2025/09/26 00:01:10 - mmengine - INFO - Iter(train) [2780/4603] base_lr: 1.6727e-05 lr: 1.6727e-05 eta: 0:35:11 time: 0.9902 data_time: 0.0144 memory: 16133 loss: 1.7812 tflops: 4.9463 tokens_per_sec: 81.7980 +2025/09/26 00:01:21 - mmengine - INFO - Iter(train) [2790/4603] base_lr: 1.6700e-05 lr: 1.6700e-05 eta: 0:34:58 time: 1.0085 data_time: 0.0097 memory: 16133 loss: 0.0586 tflops: 4.0766 tokens_per_sec: 67.4274 +2025/09/26 00:01:31 - mmengine - INFO - Iter(train) [2800/4603] base_lr: 1.6673e-05 lr: 1.6673e-05 eta: 0:34:46 time: 1.0149 data_time: 0.0088 memory: 16133 loss: 0.0011 tflops: 4.4682 tokens_per_sec: 73.8967 +2025/09/26 00:01:42 - mmengine - INFO - Iter(train) [2810/4603] base_lr: 1.6647e-05 lr: 1.6647e-05 eta: 0:34:34 time: 1.0121 data_time: 0.0102 memory: 16133 loss: 1.2500 tflops: 5.7370 tokens_per_sec: 94.8541 +2025/09/26 00:01:53 - mmengine - INFO - Iter(train) [2820/4603] base_lr: 1.6620e-05 lr: 1.6620e-05 eta: 0:34:22 time: 0.9998 data_time: 0.0090 memory: 16133 loss: 1.5703 tflops: 4.5356 tokens_per_sec: 75.0122 +2025/09/26 00:02:04 - mmengine - INFO - Iter(train) [2830/4603] base_lr: 1.6593e-05 lr: 1.6593e-05 eta: 0:34:10 time: 1.0280 data_time: 0.0087 memory: 16133 loss: 0.0464 tflops: 4.0582 tokens_per_sec: 67.1221 +2025/09/26 00:02:15 - mmengine - INFO - Iter(train) [2840/4603] base_lr: 1.6566e-05 lr: 1.6566e-05 eta: 0:33:58 time: 0.9980 data_time: 0.0085 memory: 16133 loss: 1.5781 tflops: 6.6675 tokens_per_sec: 110.2184 +2025/09/26 00:02:26 - mmengine - INFO - Iter(train) [2850/4603] base_lr: 1.6538e-05 lr: 1.6538e-05 eta: 0:33:46 time: 1.2880 data_time: 0.0091 memory: 16133 loss: 0.0942 tflops: 3.6148 tokens_per_sec: 59.7815 +2025/09/26 00:02:36 - mmengine - INFO - Iter(train) [2860/4603] base_lr: 1.6511e-05 lr: 1.6511e-05 eta: 0:33:34 time: 0.9716 data_time: 0.0093 memory: 16133 loss: 0.1943 tflops: 2.7370 tokens_per_sec: 45.2850 +2025/09/26 00:02:47 - mmengine - INFO - Iter(train) [2870/4603] base_lr: 1.6484e-05 lr: 1.6484e-05 eta: 0:33:22 time: 0.9972 data_time: 0.0090 memory: 16133 loss: 0.3672 tflops: 3.0307 tokens_per_sec: 50.1393 +2025/09/26 00:02:58 - mmengine - INFO - Iter(train) [2880/4603] base_lr: 1.6457e-05 lr: 1.6457e-05 eta: 0:33:10 time: 1.2781 data_time: 0.0090 memory: 16133 loss: 1.2891 tflops: 3.8323 tokens_per_sec: 63.3750 +2025/09/26 00:03:09 - mmengine - INFO - Iter(train) [2890/4603] base_lr: 1.6429e-05 lr: 1.6429e-05 eta: 0:32:58 time: 0.9786 data_time: 0.0088 memory: 16133 loss: 1.7031 tflops: 3.8301 tokens_per_sec: 63.3549 +2025/09/26 00:03:19 - mmengine - INFO - Iter(train) [2900/4603] base_lr: 1.6402e-05 lr: 1.6402e-05 eta: 0:32:46 time: 0.9785 data_time: 0.0088 memory: 16133 loss: 1.1484 tflops: 5.1296 tokens_per_sec: 84.8260 +2025/09/26 00:03:30 - mmengine - INFO - Iter(train) [2910/4603] base_lr: 1.6374e-05 lr: 1.6374e-05 eta: 0:32:34 time: 0.9933 data_time: 0.0087 memory: 16133 loss: 1.2344 tflops: 5.1138 tokens_per_sec: 84.5650 +2025/09/26 00:03:41 - mmengine - INFO - Iter(train) [2920/4603] base_lr: 1.6346e-05 lr: 1.6346e-05 eta: 0:32:22 time: 1.3292 data_time: 0.0089 memory: 16133 loss: 0.3809 tflops: 3.8215 tokens_per_sec: 63.1944 +2025/09/26 00:03:51 - mmengine - INFO - Iter(train) [2930/4603] base_lr: 1.6318e-05 lr: 1.6318e-05 eta: 0:32:10 time: 0.9708 data_time: 0.0091 memory: 16133 loss: 0.2871 tflops: 5.1076 tokens_per_sec: 84.4640 +2025/09/26 00:04:02 - mmengine - INFO - Iter(train) [2940/4603] base_lr: 1.6291e-05 lr: 1.6291e-05 eta: 0:31:58 time: 1.2385 data_time: 0.0085 memory: 16133 loss: 1.4531 tflops: 4.2482 tokens_per_sec: 70.2484 +2025/09/26 00:04:13 - mmengine - INFO - Iter(train) [2950/4603] base_lr: 1.6263e-05 lr: 1.6263e-05 eta: 0:31:46 time: 1.3178 data_time: 0.0089 memory: 16133 loss: 1.0078 tflops: 3.4413 tokens_per_sec: 56.9138 +2025/09/26 00:04:23 - mmengine - INFO - Iter(train) [2960/4603] base_lr: 1.6234e-05 lr: 1.6234e-05 eta: 0:31:34 time: 1.0206 data_time: 0.0095 memory: 16133 loss: 1.5703 tflops: 4.5028 tokens_per_sec: 74.4693 +2025/09/26 00:04:34 - mmengine - INFO - Iter(train) [2970/4603] base_lr: 1.6206e-05 lr: 1.6206e-05 eta: 0:31:22 time: 0.9787 data_time: 0.0087 memory: 16133 loss: 0.0039 tflops: 5.2520 tokens_per_sec: 86.8493 +2025/09/26 00:04:45 - mmengine - INFO - Iter(train) [2980/4603] base_lr: 1.6178e-05 lr: 1.6178e-05 eta: 0:31:10 time: 0.9917 data_time: 0.0089 memory: 16133 loss: 0.0057 tflops: 4.8172 tokens_per_sec: 79.6646 +2025/09/26 00:04:56 - mmengine - INFO - Iter(train) [2990/4603] base_lr: 1.6150e-05 lr: 1.6150e-05 eta: 0:30:58 time: 1.3574 data_time: 0.0091 memory: 16133 loss: 1.2734 tflops: 4.4112 tokens_per_sec: 72.9315 +2025/09/26 00:05:07 - mmengine - INFO - Exp name: temp_config_stage_2b_20250925_230352 +2025/09/26 00:05:07 - mmengine - INFO - Iter(train) [3000/4603] base_lr: 1.6122e-05 lr: 1.6122e-05 eta: 0:30:46 time: 1.0063 data_time: 0.0095 memory: 16133 loss: 0.1309 tflops: 2.9431 tokens_per_sec: 48.6920 +2025/09/26 00:05:18 - mmengine - INFO - Iter(train) [3010/4603] base_lr: 1.6093e-05 lr: 1.6093e-05 eta: 0:30:35 time: 1.0007 data_time: 0.0090 memory: 16133 loss: 1.4844 tflops: 4.7739 tokens_per_sec: 78.9484 +2025/09/26 00:05:29 - mmengine - INFO - Iter(train) [3020/4603] base_lr: 1.6065e-05 lr: 1.6065e-05 eta: 0:30:23 time: 0.9957 data_time: 0.0091 memory: 16133 loss: 0.0031 tflops: 3.8251 tokens_per_sec: 63.2717 +2025/09/26 00:05:39 - mmengine - INFO - Iter(train) [3030/4603] base_lr: 1.6036e-05 lr: 1.6036e-05 eta: 0:30:11 time: 0.9718 data_time: 0.0072 memory: 16133 loss: 1.0391 tflops: 4.6667 tokens_per_sec: 77.1797 +2025/09/26 00:05:50 - mmengine - INFO - Iter(train) [3040/4603] base_lr: 1.6007e-05 lr: 1.6007e-05 eta: 0:29:59 time: 0.9728 data_time: 0.0091 memory: 16133 loss: 1.7891 tflops: 6.2802 tokens_per_sec: 103.8286 +2025/09/26 00:06:01 - mmengine - INFO - Iter(train) [3050/4603] base_lr: 1.5979e-05 lr: 1.5979e-05 eta: 0:29:47 time: 1.0089 data_time: 0.0088 memory: 16133 loss: 1.8516 tflops: 4.7950 tokens_per_sec: 79.2973 +2025/09/26 00:06:12 - mmengine - INFO - Iter(train) [3060/4603] base_lr: 1.5950e-05 lr: 1.5950e-05 eta: 0:29:35 time: 0.9916 data_time: 0.0088 memory: 16133 loss: 0.3105 tflops: 3.5970 tokens_per_sec: 59.5022 +2025/09/26 00:06:22 - mmengine - INFO - Iter(train) [3070/4603] base_lr: 1.5921e-05 lr: 1.5921e-05 eta: 0:29:23 time: 1.0025 data_time: 0.0087 memory: 16133 loss: 1.8906 tflops: 5.7312 tokens_per_sec: 94.7607 +2025/09/26 00:06:24 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/26 00:06:24 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:06:28 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern, which is characteristic of papillary carcinoma. This arrangement is seen in the central portion of the slide, where the papillary structures are most prominent.<|im_end|> + +2025/09/26 00:06:28 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:06:36 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant nuclear pleomorphism and high mitotic activity, suggesting a high-grade malignancy. The tumor is infiltrative, with a high proliferation rate, and is associated with significant vascular invasion and lymphovascular permeation. The presence of tumor cells in the perineural spaces and the absence of vascular invasion in the periphery of the tumor support the aggressive nature of the tumor. The tumor is poorly differentiated, indicating a high-grade malignancy.<|im_end|> + +2025/09/26 00:06:36 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:06:37 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 00:06:50 - mmengine - INFO - Iter(train) [3080/4603] base_lr: 1.5892e-05 lr: 1.5892e-05 eta: 0:29:19 time: 1.0090 data_time: 0.0090 memory: 16133 loss: 1.1641 tflops: 4.0744 tokens_per_sec: 67.3908 +2025/09/26 00:07:00 - mmengine - INFO - Iter(train) [3090/4603] base_lr: 1.5863e-05 lr: 1.5863e-05 eta: 0:29:07 time: 0.9866 data_time: 0.0090 memory: 16133 loss: 1.3203 tflops: 5.0262 tokens_per_sec: 83.1172 +2025/09/26 00:07:11 - mmengine - INFO - Iter(train) [3100/4603] base_lr: 1.5834e-05 lr: 1.5834e-05 eta: 0:28:55 time: 1.0033 data_time: 0.0088 memory: 16133 loss: 0.1572 tflops: 2.8918 tokens_per_sec: 47.8425 +2025/09/26 00:07:22 - mmengine - INFO - Iter(train) [3110/4603] base_lr: 1.5804e-05 lr: 1.5804e-05 eta: 0:28:44 time: 0.9857 data_time: 0.0093 memory: 16133 loss: 0.1455 tflops: 2.9435 tokens_per_sec: 48.6980 +2025/09/26 00:07:33 - mmengine - INFO - Iter(train) [3120/4603] base_lr: 1.5775e-05 lr: 1.5775e-05 eta: 0:28:32 time: 1.0544 data_time: 0.0098 memory: 16133 loss: 0.2715 tflops: 3.2679 tokens_per_sec: 54.0596 +2025/09/26 00:07:43 - mmengine - INFO - Iter(train) [3130/4603] base_lr: 1.5746e-05 lr: 1.5746e-05 eta: 0:28:20 time: 1.2295 data_time: 0.0088 memory: 16133 loss: 0.1167 tflops: 3.6885 tokens_per_sec: 61.0020 +2025/09/26 00:07:54 - mmengine - INFO - Iter(train) [3140/4603] base_lr: 1.5716e-05 lr: 1.5716e-05 eta: 0:28:08 time: 1.0645 data_time: 0.0098 memory: 16133 loss: 0.3418 tflops: 3.1234 tokens_per_sec: 51.6696 +2025/09/26 00:08:04 - mmengine - INFO - Iter(train) [3150/4603] base_lr: 1.5687e-05 lr: 1.5687e-05 eta: 0:27:56 time: 0.9793 data_time: 0.0087 memory: 16133 loss: 0.1543 tflops: 2.9627 tokens_per_sec: 49.0162 +2025/09/26 00:08:15 - mmengine - INFO - Iter(train) [3160/4603] base_lr: 1.5657e-05 lr: 1.5657e-05 eta: 0:27:44 time: 1.3106 data_time: 0.0090 memory: 16133 loss: 1.3984 tflops: 4.3839 tokens_per_sec: 72.4837 +2025/09/26 00:08:26 - mmengine - INFO - Iter(train) [3170/4603] base_lr: 1.5628e-05 lr: 1.5628e-05 eta: 0:27:32 time: 1.0121 data_time: 0.0094 memory: 16133 loss: 0.2031 tflops: 2.6872 tokens_per_sec: 44.4606 +2025/09/26 00:08:37 - mmengine - INFO - Iter(train) [3180/4603] base_lr: 1.5598e-05 lr: 1.5598e-05 eta: 0:27:20 time: 1.2986 data_time: 0.0092 memory: 16133 loss: 0.1768 tflops: 2.3274 tokens_per_sec: 38.5041 +2025/09/26 00:08:47 - mmengine - INFO - Iter(train) [3190/4603] base_lr: 1.5568e-05 lr: 1.5568e-05 eta: 0:27:08 time: 1.3249 data_time: 0.0091 memory: 16133 loss: 0.0060 tflops: 2.9661 tokens_per_sec: 49.0614 +2025/09/26 00:08:58 - mmengine - INFO - Iter(train) [3200/4603] base_lr: 1.5538e-05 lr: 1.5538e-05 eta: 0:26:56 time: 1.4828 data_time: 0.0095 memory: 16133 loss: 1.3984 tflops: 3.2624 tokens_per_sec: 53.9522 +2025/09/26 00:09:09 - mmengine - INFO - Iter(train) [3210/4603] base_lr: 1.5508e-05 lr: 1.5508e-05 eta: 0:26:44 time: 0.9880 data_time: 0.0089 memory: 16133 loss: 0.0552 tflops: 4.4063 tokens_per_sec: 72.8756 +2025/09/26 00:09:20 - mmengine - INFO - Iter(train) [3220/4603] base_lr: 1.5478e-05 lr: 1.5478e-05 eta: 0:26:33 time: 0.9761 data_time: 0.0089 memory: 16133 loss: 0.1396 tflops: 2.9723 tokens_per_sec: 49.1755 +2025/09/26 00:09:30 - mmengine - INFO - Iter(train) [3230/4603] base_lr: 1.5448e-05 lr: 1.5448e-05 eta: 0:26:21 time: 1.0213 data_time: 0.0086 memory: 16133 loss: 1.3828 tflops: 4.5589 tokens_per_sec: 75.3960 +2025/09/26 00:09:41 - mmengine - INFO - Iter(train) [3240/4603] base_lr: 1.5418e-05 lr: 1.5418e-05 eta: 0:26:09 time: 1.3057 data_time: 0.0087 memory: 16133 loss: 0.0021 tflops: 3.1487 tokens_per_sec: 52.0789 +2025/09/26 00:09:52 - mmengine - INFO - Iter(train) [3250/4603] base_lr: 1.5388e-05 lr: 1.5388e-05 eta: 0:25:57 time: 1.0039 data_time: 0.0088 memory: 16133 loss: 0.2393 tflops: 4.0953 tokens_per_sec: 67.7364 +2025/09/26 00:10:02 - mmengine - INFO - Iter(train) [3260/4603] base_lr: 1.5357e-05 lr: 1.5357e-05 eta: 0:25:45 time: 1.0255 data_time: 0.0087 memory: 16133 loss: 0.0050 tflops: 3.7139 tokens_per_sec: 61.4314 +2025/09/26 00:10:14 - mmengine - INFO - Iter(train) [3270/4603] base_lr: 1.5327e-05 lr: 1.5327e-05 eta: 0:25:33 time: 1.3315 data_time: 0.0095 memory: 16133 loss: 1.1094 tflops: 3.9968 tokens_per_sec: 66.0892 +2025/09/26 00:10:25 - mmengine - INFO - Iter(train) [3280/4603] base_lr: 1.5297e-05 lr: 1.5297e-05 eta: 0:25:22 time: 1.3701 data_time: 0.0092 memory: 16133 loss: 1.3281 tflops: 4.2378 tokens_per_sec: 70.0676 +2025/09/26 00:10:35 - mmengine - INFO - Iter(train) [3290/4603] base_lr: 1.5266e-05 lr: 1.5266e-05 eta: 0:25:10 time: 1.0187 data_time: 0.0088 memory: 16133 loss: 0.2021 tflops: 2.8481 tokens_per_sec: 47.1206 +2025/09/26 00:10:46 - mmengine - INFO - Iter(train) [3300/4603] base_lr: 1.5236e-05 lr: 1.5236e-05 eta: 0:24:58 time: 1.3282 data_time: 0.0091 memory: 16133 loss: 1.2812 tflops: 2.7765 tokens_per_sec: 45.9275 +2025/09/26 00:10:57 - mmengine - INFO - Iter(train) [3310/4603] base_lr: 1.5205e-05 lr: 1.5205e-05 eta: 0:24:46 time: 1.2976 data_time: 0.0088 memory: 16133 loss: 1.1719 tflops: 3.6347 tokens_per_sec: 60.1106 +2025/09/26 00:11:07 - mmengine - INFO - Iter(train) [3320/4603] base_lr: 1.5174e-05 lr: 1.5174e-05 eta: 0:24:34 time: 1.0035 data_time: 0.0093 memory: 16133 loss: 2.0781 tflops: 5.4844 tokens_per_sec: 90.6849 +2025/09/26 00:11:18 - mmengine - INFO - Iter(train) [3330/4603] base_lr: 1.5143e-05 lr: 1.5143e-05 eta: 0:24:23 time: 1.0237 data_time: 0.0088 memory: 16133 loss: 2.0781 tflops: 5.0211 tokens_per_sec: 83.0305 +2025/09/26 00:11:29 - mmengine - INFO - Iter(train) [3340/4603] base_lr: 1.5113e-05 lr: 1.5113e-05 eta: 0:24:11 time: 1.0467 data_time: 0.0088 memory: 16133 loss: 0.0869 tflops: 3.2341 tokens_per_sec: 53.5008 +2025/09/26 00:11:39 - mmengine - INFO - Iter(train) [3350/4603] base_lr: 1.5082e-05 lr: 1.5082e-05 eta: 0:23:59 time: 1.2037 data_time: 0.0092 memory: 16133 loss: 0.1562 tflops: 2.7119 tokens_per_sec: 44.8634 +2025/09/26 00:11:50 - mmengine - INFO - Iter(train) [3360/4603] base_lr: 1.5051e-05 lr: 1.5051e-05 eta: 0:23:47 time: 0.9809 data_time: 0.0090 memory: 16133 loss: 0.1338 tflops: 3.1427 tokens_per_sec: 51.9926 +2025/09/26 00:12:01 - mmengine - INFO - Iter(train) [3370/4603] base_lr: 1.5020e-05 lr: 1.5020e-05 eta: 0:23:36 time: 1.2019 data_time: 0.0089 memory: 16133 loss: 1.4141 tflops: 3.5717 tokens_per_sec: 59.0737 +2025/09/26 00:12:12 - mmengine - INFO - Iter(train) [3380/4603] base_lr: 1.4989e-05 lr: 1.4989e-05 eta: 0:23:24 time: 0.9920 data_time: 0.0091 memory: 16133 loss: 0.6328 tflops: 2.8636 tokens_per_sec: 47.3769 +2025/09/26 00:12:23 - mmengine - INFO - Iter(train) [3390/4603] base_lr: 1.4957e-05 lr: 1.4957e-05 eta: 0:23:12 time: 1.0168 data_time: 0.0092 memory: 16133 loss: 0.1318 tflops: 3.6268 tokens_per_sec: 59.9923 +2025/09/26 00:12:34 - mmengine - INFO - Iter(train) [3400/4603] base_lr: 1.4926e-05 lr: 1.4926e-05 eta: 0:23:00 time: 0.9841 data_time: 0.0088 memory: 16133 loss: 1.4453 tflops: 4.7310 tokens_per_sec: 78.2421 +2025/09/26 00:12:44 - mmengine - INFO - Iter(train) [3410/4603] base_lr: 1.4895e-05 lr: 1.4895e-05 eta: 0:22:49 time: 0.9908 data_time: 0.0085 memory: 16133 loss: 1.0312 tflops: 4.8211 tokens_per_sec: 79.7300 +2025/09/26 00:12:55 - mmengine - INFO - Iter(train) [3420/4603] base_lr: 1.4864e-05 lr: 1.4864e-05 eta: 0:22:37 time: 1.3197 data_time: 0.0087 memory: 16133 loss: 0.0010 tflops: 2.9778 tokens_per_sec: 49.2551 +2025/09/26 00:13:06 - mmengine - INFO - Iter(train) [3430/4603] base_lr: 1.4832e-05 lr: 1.4832e-05 eta: 0:22:25 time: 0.9851 data_time: 0.0088 memory: 16133 loss: 1.4688 tflops: 5.4026 tokens_per_sec: 89.3348 +2025/09/26 00:13:16 - mmengine - INFO - Iter(train) [3440/4603] base_lr: 1.4801e-05 lr: 1.4801e-05 eta: 0:22:13 time: 0.9856 data_time: 0.0087 memory: 16133 loss: 2.0000 tflops: 6.2595 tokens_per_sec: 103.4863 +2025/09/26 00:13:27 - mmengine - INFO - Iter(train) [3450/4603] base_lr: 1.4769e-05 lr: 1.4769e-05 eta: 0:22:02 time: 0.9948 data_time: 0.0093 memory: 16133 loss: 1.2500 tflops: 5.9586 tokens_per_sec: 98.5157 +2025/09/26 00:13:38 - mmengine - INFO - Iter(train) [3460/4603] base_lr: 1.4738e-05 lr: 1.4738e-05 eta: 0:21:50 time: 1.0002 data_time: 0.0094 memory: 16133 loss: 1.8516 tflops: 4.6551 tokens_per_sec: 76.9863 +2025/09/26 00:13:49 - mmengine - INFO - Iter(train) [3470/4603] base_lr: 1.4706e-05 lr: 1.4706e-05 eta: 0:21:38 time: 0.9985 data_time: 0.0089 memory: 16133 loss: 0.1357 tflops: 2.9056 tokens_per_sec: 48.0719 +2025/09/26 00:13:59 - mmengine - INFO - Iter(train) [3480/4603] base_lr: 1.4674e-05 lr: 1.4674e-05 eta: 0:21:27 time: 1.0127 data_time: 0.0085 memory: 16133 loss: 1.1016 tflops: 4.5378 tokens_per_sec: 75.0473 +2025/09/26 00:14:10 - mmengine - INFO - Iter(train) [3490/4603] base_lr: 1.4642e-05 lr: 1.4642e-05 eta: 0:21:15 time: 0.9830 data_time: 0.0090 memory: 16133 loss: 0.1299 tflops: 3.1975 tokens_per_sec: 52.8974 +2025/09/26 00:14:21 - mmengine - INFO - Iter(train) [3500/4603] base_lr: 1.4610e-05 lr: 1.4610e-05 eta: 0:21:03 time: 1.0022 data_time: 0.0088 memory: 16133 loss: 1.2422 tflops: 4.4647 tokens_per_sec: 73.8406 +2025/09/26 00:14:32 - mmengine - INFO - Iter(train) [3510/4603] base_lr: 1.4579e-05 lr: 1.4579e-05 eta: 0:20:52 time: 1.3369 data_time: 0.0104 memory: 16133 loss: 0.0344 tflops: 2.7132 tokens_per_sec: 44.8810 +2025/09/26 00:14:43 - mmengine - INFO - Iter(train) [3520/4603] base_lr: 1.4547e-05 lr: 1.4547e-05 eta: 0:20:40 time: 1.2725 data_time: 0.0089 memory: 16133 loss: 0.2285 tflops: 2.9932 tokens_per_sec: 49.5105 +2025/09/26 00:14:53 - mmengine - INFO - Iter(train) [3530/4603] base_lr: 1.4515e-05 lr: 1.4515e-05 eta: 0:20:28 time: 1.2463 data_time: 0.0086 memory: 16133 loss: 0.9414 tflops: 3.7357 tokens_per_sec: 61.7806 +2025/09/26 00:15:04 - mmengine - INFO - Iter(train) [3540/4603] base_lr: 1.4483e-05 lr: 1.4483e-05 eta: 0:20:17 time: 1.2745 data_time: 0.0089 memory: 16133 loss: 0.0601 tflops: 4.2231 tokens_per_sec: 69.8308 +2025/09/26 00:15:14 - mmengine - INFO - Iter(train) [3550/4603] base_lr: 1.4450e-05 lr: 1.4450e-05 eta: 0:20:05 time: 1.0097 data_time: 0.0093 memory: 16133 loss: 1.6406 tflops: 4.3714 tokens_per_sec: 72.2978 +2025/09/26 00:15:25 - mmengine - INFO - Iter(train) [3560/4603] base_lr: 1.4418e-05 lr: 1.4418e-05 eta: 0:19:53 time: 1.4085 data_time: 0.0096 memory: 16133 loss: 0.1187 tflops: 2.1886 tokens_per_sec: 36.2075 +2025/09/26 00:15:36 - mmengine - INFO - Iter(train) [3570/4603] base_lr: 1.4386e-05 lr: 1.4386e-05 eta: 0:19:42 time: 1.0603 data_time: 0.0090 memory: 16133 loss: 0.3281 tflops: 2.5082 tokens_per_sec: 41.4995 +2025/09/26 00:15:47 - mmengine - INFO - Iter(train) [3580/4603] base_lr: 1.4354e-05 lr: 1.4354e-05 eta: 0:19:30 time: 1.0076 data_time: 0.0092 memory: 16133 loss: 3.2500 tflops: 3.9603 tokens_per_sec: 65.5051 +2025/09/26 00:15:51 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/26 00:15:51 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:15:56 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. This pattern is characterized by the formation of distinct lobules, each containing a cluster of tumor cells. The lobules are separated by fibrous tissue, creating a well-defined structure within the slide.<|im_end|> + +2025/09/26 00:15:56 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:16:08 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and cellular atypia. The tumor exhibits a high degree of nuclear pleomorphism and marked mitotic activity, suggesting a high-grade malignancy. The presence of vascular invasion and lymphovascular emboli underscores the aggressive nature of the tumor. The absence of lymphovascular invasion and perineural involvement is noted, indicating a lower risk of distant metastasis. The tumor is characterized by a high nuclear grade, reflecting a high-grade malignancy. The absence of specific molecular markers such as EGFR, HER2, and p53 indicates a non-specific genetic profile. The overall prognosis is poor, reflecting the aggressive nature of the tumor and the high likelihood of metastasis.<|im_end|> + +2025/09/26 00:16:08 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:16:09 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 00:16:16 - mmengine - INFO - Iter(train) [3590/4603] base_lr: 1.4321e-05 lr: 1.4321e-05 eta: 0:19:24 time: 1.0206 data_time: 0.0091 memory: 16133 loss: 0.4883 tflops: 2.6057 tokens_per_sec: 43.1124 +2025/09/26 00:16:27 - mmengine - INFO - Iter(train) [3600/4603] base_lr: 1.4289e-05 lr: 1.4289e-05 eta: 0:19:12 time: 1.0277 data_time: 0.0092 memory: 16133 loss: 0.0028 tflops: 5.0605 tokens_per_sec: 83.6811 +2025/09/26 00:16:38 - mmengine - INFO - Iter(train) [3610/4603] base_lr: 1.4256e-05 lr: 1.4256e-05 eta: 0:19:00 time: 1.2763 data_time: 0.0088 memory: 16133 loss: 0.0344 tflops: 3.3161 tokens_per_sec: 54.8465 +2025/09/26 00:16:48 - mmengine - INFO - Iter(train) [3620/4603] base_lr: 1.4224e-05 lr: 1.4224e-05 eta: 0:18:48 time: 1.2333 data_time: 0.0089 memory: 16133 loss: 2.4219 tflops: 3.2844 tokens_per_sec: 54.3252 +2025/09/26 00:16:59 - mmengine - INFO - Iter(train) [3630/4603] base_lr: 1.4191e-05 lr: 1.4191e-05 eta: 0:18:37 time: 0.9884 data_time: 0.0094 memory: 16133 loss: 1.0234 tflops: 4.8941 tokens_per_sec: 80.9357 +2025/09/26 00:17:10 - mmengine - INFO - Iter(train) [3640/4603] base_lr: 1.4159e-05 lr: 1.4159e-05 eta: 0:18:25 time: 1.0117 data_time: 0.0091 memory: 16133 loss: 0.0042 tflops: 4.1835 tokens_per_sec: 69.1932 +2025/09/26 00:17:21 - mmengine - INFO - Iter(train) [3650/4603] base_lr: 1.4126e-05 lr: 1.4126e-05 eta: 0:18:14 time: 1.3339 data_time: 0.0088 memory: 16133 loss: 0.0065 tflops: 3.1730 tokens_per_sec: 52.4794 +2025/09/26 00:17:32 - mmengine - INFO - Iter(train) [3660/4603] base_lr: 1.4093e-05 lr: 1.4093e-05 eta: 0:18:02 time: 1.0064 data_time: 0.0091 memory: 16133 loss: 1.0859 tflops: 4.6863 tokens_per_sec: 77.5015 +2025/09/26 00:17:42 - mmengine - INFO - Iter(train) [3670/4603] base_lr: 1.4060e-05 lr: 1.4060e-05 eta: 0:17:50 time: 0.9896 data_time: 0.0091 memory: 16134 loss: 0.1641 tflops: 3.6041 tokens_per_sec: 59.6193 +2025/09/26 00:17:52 - mmengine - INFO - Iter(train) [3680/4603] base_lr: 1.4028e-05 lr: 1.4028e-05 eta: 0:17:38 time: 1.0157 data_time: 0.0087 memory: 16133 loss: 0.1846 tflops: 3.0352 tokens_per_sec: 50.2129 +2025/09/26 00:18:03 - mmengine - INFO - Iter(train) [3690/4603] base_lr: 1.3995e-05 lr: 1.3995e-05 eta: 0:17:27 time: 0.9776 data_time: 0.0102 memory: 16133 loss: 0.0942 tflops: 3.7105 tokens_per_sec: 61.3775 +2025/09/26 00:18:13 - mmengine - INFO - Iter(train) [3700/4603] base_lr: 1.3962e-05 lr: 1.3962e-05 eta: 0:17:15 time: 1.0172 data_time: 0.0095 memory: 16133 loss: 0.8242 tflops: 5.1721 tokens_per_sec: 85.5259 +2025/09/26 00:18:24 - mmengine - INFO - Iter(train) [3710/4603] base_lr: 1.3929e-05 lr: 1.3929e-05 eta: 0:17:03 time: 1.3929 data_time: 0.0095 memory: 16133 loss: 0.1338 tflops: 2.0829 tokens_per_sec: 34.4608 +2025/09/26 00:18:36 - mmengine - INFO - Iter(train) [3720/4603] base_lr: 1.3896e-05 lr: 1.3896e-05 eta: 0:16:52 time: 1.0100 data_time: 0.0094 memory: 16133 loss: 1.1328 tflops: 4.6096 tokens_per_sec: 76.2344 +2025/09/26 00:18:46 - mmengine - INFO - Iter(train) [3730/4603] base_lr: 1.3862e-05 lr: 1.3862e-05 eta: 0:16:40 time: 0.9841 data_time: 0.0090 memory: 16133 loss: 0.1299 tflops: 2.9481 tokens_per_sec: 48.7754 +2025/09/26 00:18:57 - mmengine - INFO - Iter(train) [3740/4603] base_lr: 1.3829e-05 lr: 1.3829e-05 eta: 0:16:28 time: 1.0322 data_time: 0.0091 memory: 16133 loss: 0.3613 tflops: 2.9866 tokens_per_sec: 49.4098 +2025/09/26 00:19:08 - mmengine - INFO - Iter(train) [3750/4603] base_lr: 1.3796e-05 lr: 1.3796e-05 eta: 0:16:17 time: 1.2416 data_time: 0.0091 memory: 16133 loss: 1.4062 tflops: 4.5301 tokens_per_sec: 74.9037 +2025/09/26 00:19:18 - mmengine - INFO - Iter(train) [3760/4603] base_lr: 1.3763e-05 lr: 1.3763e-05 eta: 0:16:05 time: 1.0049 data_time: 0.0090 memory: 16133 loss: 1.8984 tflops: 7.5264 tokens_per_sec: 124.3926 +2025/09/26 00:19:29 - mmengine - INFO - Iter(train) [3770/4603] base_lr: 1.3730e-05 lr: 1.3730e-05 eta: 0:15:54 time: 1.3466 data_time: 0.0089 memory: 16133 loss: 0.1865 tflops: 2.0647 tokens_per_sec: 34.1605 +2025/09/26 00:19:40 - mmengine - INFO - Iter(train) [3780/4603] base_lr: 1.3696e-05 lr: 1.3696e-05 eta: 0:15:42 time: 1.0058 data_time: 0.0090 memory: 16133 loss: 0.0087 tflops: 3.7264 tokens_per_sec: 61.6404 +2025/09/26 00:19:50 - mmengine - INFO - Iter(train) [3790/4603] base_lr: 1.3663e-05 lr: 1.3663e-05 eta: 0:15:30 time: 1.2341 data_time: 0.0090 memory: 16133 loss: 2.0000 tflops: 4.5576 tokens_per_sec: 75.3579 +2025/09/26 00:20:01 - mmengine - INFO - Iter(train) [3800/4603] base_lr: 1.3629e-05 lr: 1.3629e-05 eta: 0:15:19 time: 1.3312 data_time: 0.0092 memory: 16133 loss: 1.3672 tflops: 2.9520 tokens_per_sec: 48.8283 +2025/09/26 00:20:12 - mmengine - INFO - Iter(train) [3810/4603] base_lr: 1.3596e-05 lr: 1.3596e-05 eta: 0:15:07 time: 1.0090 data_time: 0.0088 memory: 16133 loss: 0.2139 tflops: 2.8754 tokens_per_sec: 47.5714 +2025/09/26 00:20:22 - mmengine - INFO - Iter(train) [3820/4603] base_lr: 1.3562e-05 lr: 1.3562e-05 eta: 0:14:56 time: 0.9852 data_time: 0.0095 memory: 16133 loss: 1.2266 tflops: 5.4019 tokens_per_sec: 89.3247 +2025/09/26 00:20:33 - mmengine - INFO - Iter(train) [3830/4603] base_lr: 1.3529e-05 lr: 1.3529e-05 eta: 0:14:44 time: 0.9838 data_time: 0.0089 memory: 16133 loss: 0.0020 tflops: 3.8098 tokens_per_sec: 63.0184 +2025/09/26 00:20:44 - mmengine - INFO - Iter(train) [3840/4603] base_lr: 1.3495e-05 lr: 1.3495e-05 eta: 0:14:32 time: 1.2704 data_time: 0.0090 memory: 16133 loss: 1.1562 tflops: 4.4752 tokens_per_sec: 73.9947 +2025/09/26 00:20:54 - mmengine - INFO - Iter(train) [3850/4603] base_lr: 1.3461e-05 lr: 1.3461e-05 eta: 0:14:21 time: 1.0034 data_time: 0.0092 memory: 16133 loss: 0.0620 tflops: 4.7006 tokens_per_sec: 77.7371 +2025/09/26 00:21:05 - mmengine - INFO - Iter(train) [3860/4603] base_lr: 1.3428e-05 lr: 1.3428e-05 eta: 0:14:09 time: 0.9923 data_time: 0.0092 memory: 16133 loss: 1.5312 tflops: 4.1433 tokens_per_sec: 68.5304 +2025/09/26 00:21:16 - mmengine - INFO - Iter(train) [3870/4603] base_lr: 1.3394e-05 lr: 1.3394e-05 eta: 0:13:58 time: 1.2091 data_time: 0.0089 memory: 16133 loss: 1.4766 tflops: 4.2012 tokens_per_sec: 69.4730 +2025/09/26 00:21:26 - mmengine - INFO - Iter(train) [3880/4603] base_lr: 1.3360e-05 lr: 1.3360e-05 eta: 0:13:46 time: 1.1507 data_time: 0.0087 memory: 16133 loss: 0.0255 tflops: 3.5202 tokens_per_sec: 58.2243 +2025/09/26 00:21:37 - mmengine - INFO - Iter(train) [3890/4603] base_lr: 1.3326e-05 lr: 1.3326e-05 eta: 0:13:35 time: 1.1529 data_time: 0.0087 memory: 16133 loss: 0.1914 tflops: 2.8313 tokens_per_sec: 46.8382 +2025/09/26 00:21:48 - mmengine - INFO - Iter(train) [3900/4603] base_lr: 1.3292e-05 lr: 1.3292e-05 eta: 0:13:23 time: 0.9814 data_time: 0.0091 memory: 16133 loss: 0.1436 tflops: 3.4495 tokens_per_sec: 57.0636 +2025/09/26 00:21:58 - mmengine - INFO - Iter(train) [3910/4603] base_lr: 1.3258e-05 lr: 1.3258e-05 eta: 0:13:11 time: 1.2441 data_time: 0.0091 memory: 16133 loss: 1.0547 tflops: 3.5965 tokens_per_sec: 59.4822 +2025/09/26 00:22:09 - mmengine - INFO - Iter(train) [3920/4603] base_lr: 1.3224e-05 lr: 1.3224e-05 eta: 0:13:00 time: 0.9862 data_time: 0.0088 memory: 16133 loss: 0.2275 tflops: 3.1873 tokens_per_sec: 52.7298 +2025/09/26 00:22:20 - mmengine - INFO - Iter(train) [3930/4603] base_lr: 1.3190e-05 lr: 1.3190e-05 eta: 0:12:48 time: 1.0150 data_time: 0.0092 memory: 16133 loss: 0.9297 tflops: 5.0643 tokens_per_sec: 83.7452 +2025/09/26 00:22:30 - mmengine - INFO - Iter(train) [3940/4603] base_lr: 1.3156e-05 lr: 1.3156e-05 eta: 0:12:37 time: 1.0019 data_time: 0.0091 memory: 16133 loss: 1.0156 tflops: 4.0432 tokens_per_sec: 66.8763 +2025/09/26 00:22:41 - mmengine - INFO - Iter(train) [3950/4603] base_lr: 1.3122e-05 lr: 1.3122e-05 eta: 0:12:25 time: 1.0116 data_time: 0.0090 memory: 16133 loss: 0.0117 tflops: 4.1239 tokens_per_sec: 68.2090 +2025/09/26 00:22:51 - mmengine - INFO - Iter(train) [3960/4603] base_lr: 1.3088e-05 lr: 1.3088e-05 eta: 0:12:14 time: 1.0011 data_time: 0.0096 memory: 16133 loss: 0.1387 tflops: 2.8981 tokens_per_sec: 47.9476 +2025/09/26 00:23:03 - mmengine - INFO - Iter(train) [3970/4603] base_lr: 1.3054e-05 lr: 1.3054e-05 eta: 0:12:02 time: 1.0106 data_time: 0.0098 memory: 16133 loss: 0.9492 tflops: 4.4274 tokens_per_sec: 73.2232 +2025/09/26 00:23:13 - mmengine - INFO - Iter(train) [3980/4603] base_lr: 1.3020e-05 lr: 1.3020e-05 eta: 0:11:51 time: 0.9831 data_time: 0.0093 memory: 16133 loss: 0.0327 tflops: 4.7976 tokens_per_sec: 79.3425 +2025/09/26 00:23:23 - mmengine - INFO - Iter(train) [3990/4603] base_lr: 1.2985e-05 lr: 1.2985e-05 eta: 0:11:39 time: 1.0226 data_time: 0.0086 memory: 16133 loss: 0.0143 tflops: 4.0202 tokens_per_sec: 66.4947 +2025/09/26 00:23:35 - mmengine - INFO - Exp name: temp_config_stage_2b_20250925_230352 +2025/09/26 00:23:35 - mmengine - INFO - Iter(train) [4000/4603] base_lr: 1.2951e-05 lr: 1.2951e-05 eta: 0:11:28 time: 1.0054 data_time: 0.0089 memory: 16133 loss: 1.8047 tflops: 4.0890 tokens_per_sec: 67.6327 +2025/09/26 00:23:45 - mmengine - INFO - Iter(train) [4010/4603] base_lr: 1.2917e-05 lr: 1.2917e-05 eta: 0:11:16 time: 1.0038 data_time: 0.0093 memory: 16133 loss: 1.2969 tflops: 5.5431 tokens_per_sec: 91.6536 +2025/09/26 00:23:55 - mmengine - INFO - Iter(train) [4020/4603] base_lr: 1.2882e-05 lr: 1.2882e-05 eta: 0:11:04 time: 0.9890 data_time: 0.0087 memory: 16133 loss: 1.0859 tflops: 4.7077 tokens_per_sec: 77.8558 +2025/09/26 00:24:06 - mmengine - INFO - Iter(train) [4030/4603] base_lr: 1.2848e-05 lr: 1.2848e-05 eta: 0:10:53 time: 0.9968 data_time: 0.0088 memory: 16133 loss: 1.0859 tflops: 5.3996 tokens_per_sec: 89.2854 +2025/09/26 00:24:17 - mmengine - INFO - Iter(train) [4040/4603] base_lr: 1.2814e-05 lr: 1.2814e-05 eta: 0:10:41 time: 0.9872 data_time: 0.0091 memory: 16133 loss: 1.3906 tflops: 4.4709 tokens_per_sec: 73.9446 +2025/09/26 00:24:27 - mmengine - INFO - Iter(train) [4050/4603] base_lr: 1.2779e-05 lr: 1.2779e-05 eta: 0:10:30 time: 0.9902 data_time: 0.0090 memory: 16133 loss: 0.6602 tflops: 3.7853 tokens_per_sec: 62.6131 +2025/09/26 00:24:38 - mmengine - INFO - Iter(train) [4060/4603] base_lr: 1.2745e-05 lr: 1.2745e-05 eta: 0:10:18 time: 0.9813 data_time: 0.0091 memory: 16133 loss: 0.1758 tflops: 3.0181 tokens_per_sec: 49.9322 +2025/09/26 00:24:49 - mmengine - INFO - Iter(train) [4070/4603] base_lr: 1.2710e-05 lr: 1.2710e-05 eta: 0:10:07 time: 0.9898 data_time: 0.0090 memory: 16133 loss: 0.1758 tflops: 2.9924 tokens_per_sec: 49.5061 +2025/09/26 00:24:59 - mmengine - INFO - Iter(train) [4080/4603] base_lr: 1.2675e-05 lr: 1.2675e-05 eta: 0:09:55 time: 0.9845 data_time: 0.0091 memory: 16133 loss: 0.0435 tflops: 3.9302 tokens_per_sec: 65.0097 +2025/09/26 00:25:10 - mmengine - INFO - Iter(train) [4090/4603] base_lr: 1.2641e-05 lr: 1.2641e-05 eta: 0:09:44 time: 0.9974 data_time: 0.0091 memory: 16133 loss: 0.1270 tflops: 4.3041 tokens_per_sec: 71.1871 +2025/09/26 00:25:17 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/26 00:25:17 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:25:21 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. This pattern is characterized by the formation of distinct lobules, each containing a cluster of tumor cells. The lobules are separated by fibrous stroma, contributing to the overall architectural organization of the tumor.<|im_end|> + +2025/09/26 00:25:21 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:25:31 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and nuclear atypia. The tumor exhibits a high degree of pleomorphism, with marked nuclear atypia and prominent nucleoli. The presence of microvascular invasion and lymphovascular invasion is noted, along with the involvement of the perineural space. The tumor is characterized by a high mitotic rate, with a high Ki-67 proliferation index, reflecting a high-grade malignancy. The absence of specific molecular markers such as EGFR, HER2, and p53 further supports the aggressive nature of the tumor.<|im_end|> + +2025/09/26 00:25:31 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:25:32 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 00:25:32 - mmengine - INFO - Saving checkpoint at 4096 iterations +2025/09/26 00:25:42 - mmengine - INFO - Iter(train) [4100/4603] base_lr: 1.2606e-05 lr: 1.2606e-05 eta: 0:09:35 time: 0.9916 data_time: 0.0091 memory: 16133 loss: 1.5000 tflops: 4.5124 tokens_per_sec: 74.6286 +2025/09/26 00:25:53 - mmengine - INFO - Iter(train) [4110/4603] base_lr: 1.2571e-05 lr: 1.2571e-05 eta: 0:09:24 time: 1.0111 data_time: 0.0094 memory: 16133 loss: 1.6484 tflops: 4.1857 tokens_per_sec: 69.2294 +2025/09/26 00:26:04 - mmengine - INFO - Iter(train) [4120/4603] base_lr: 1.2537e-05 lr: 1.2537e-05 eta: 0:09:12 time: 0.9987 data_time: 0.0091 memory: 16133 loss: 0.0099 tflops: 4.8439 tokens_per_sec: 80.1053 +2025/09/26 00:26:15 - mmengine - INFO - Iter(train) [4130/4603] base_lr: 1.2502e-05 lr: 1.2502e-05 eta: 0:09:01 time: 1.0123 data_time: 0.0092 memory: 16133 loss: 0.0532 tflops: 4.0611 tokens_per_sec: 67.1714 +2025/09/26 00:26:26 - mmengine - INFO - Iter(train) [4140/4603] base_lr: 1.2467e-05 lr: 1.2467e-05 eta: 0:08:49 time: 1.0614 data_time: 0.0100 memory: 16133 loss: 0.0042 tflops: 4.5007 tokens_per_sec: 74.4304 +2025/09/26 00:26:37 - mmengine - INFO - Iter(train) [4150/4603] base_lr: 1.2432e-05 lr: 1.2432e-05 eta: 0:08:38 time: 1.0191 data_time: 0.0091 memory: 16133 loss: 1.6328 tflops: 4.6281 tokens_per_sec: 76.5380 +2025/09/26 00:26:48 - mmengine - INFO - Iter(train) [4160/4603] base_lr: 1.2397e-05 lr: 1.2397e-05 eta: 0:08:26 time: 1.6104 data_time: 0.0089 memory: 16136 loss: 1.6094 tflops: 4.2826 tokens_per_sec: 70.7907 +2025/09/26 00:26:59 - mmengine - INFO - Iter(train) [4170/4603] base_lr: 1.2363e-05 lr: 1.2363e-05 eta: 0:08:15 time: 1.0015 data_time: 0.0089 memory: 16133 loss: 0.0258 tflops: 5.3138 tokens_per_sec: 87.8676 +2025/09/26 00:27:10 - mmengine - INFO - Iter(train) [4180/4603] base_lr: 1.2328e-05 lr: 1.2328e-05 eta: 0:08:03 time: 1.2856 data_time: 0.0091 memory: 16133 loss: 1.3984 tflops: 3.1037 tokens_per_sec: 51.3375 +2025/09/26 00:27:20 - mmengine - INFO - Iter(train) [4190/4603] base_lr: 1.2293e-05 lr: 1.2293e-05 eta: 0:07:52 time: 1.3148 data_time: 0.0089 memory: 16133 loss: 2.2969 tflops: 4.6463 tokens_per_sec: 76.8162 +2025/09/26 00:27:32 - mmengine - INFO - Iter(train) [4200/4603] base_lr: 1.2258e-05 lr: 1.2258e-05 eta: 0:07:40 time: 1.0084 data_time: 0.0096 memory: 16133 loss: 0.0070 tflops: 4.4369 tokens_per_sec: 73.3810 +2025/09/26 00:27:42 - mmengine - INFO - Iter(train) [4210/4603] base_lr: 1.2223e-05 lr: 1.2223e-05 eta: 0:07:29 time: 1.0134 data_time: 0.0092 memory: 16133 loss: 1.7188 tflops: 5.5503 tokens_per_sec: 91.7715 +2025/09/26 00:27:53 - mmengine - INFO - Iter(train) [4220/4603] base_lr: 1.2188e-05 lr: 1.2188e-05 eta: 0:07:17 time: 1.2285 data_time: 0.0095 memory: 16133 loss: 0.2012 tflops: 3.6423 tokens_per_sec: 60.2385 +2025/09/26 00:28:04 - mmengine - INFO - Iter(train) [4230/4603] base_lr: 1.2153e-05 lr: 1.2153e-05 eta: 0:07:06 time: 0.9663 data_time: 0.0093 memory: 16133 loss: 0.8750 tflops: 5.0061 tokens_per_sec: 82.7874 +2025/09/26 00:28:15 - mmengine - INFO - Iter(train) [4240/4603] base_lr: 1.2118e-05 lr: 1.2118e-05 eta: 0:06:54 time: 0.9997 data_time: 0.0094 memory: 16133 loss: 1.2422 tflops: 3.6283 tokens_per_sec: 60.0180 +2025/09/26 00:28:25 - mmengine - INFO - Iter(train) [4250/4603] base_lr: 1.2082e-05 lr: 1.2082e-05 eta: 0:06:43 time: 1.0196 data_time: 0.0092 memory: 16133 loss: 1.2109 tflops: 4.0323 tokens_per_sec: 66.6949 +2025/09/26 00:28:36 - mmengine - INFO - Iter(train) [4260/4603] base_lr: 1.2047e-05 lr: 1.2047e-05 eta: 0:06:31 time: 1.0318 data_time: 0.0093 memory: 16133 loss: 1.4609 tflops: 4.2192 tokens_per_sec: 69.7813 +2025/09/26 00:28:47 - mmengine - INFO - Iter(train) [4270/4603] base_lr: 1.2012e-05 lr: 1.2012e-05 eta: 0:06:20 time: 0.9861 data_time: 0.0094 memory: 16133 loss: 0.1553 tflops: 3.3715 tokens_per_sec: 55.7747 +2025/09/26 00:28:58 - mmengine - INFO - Iter(train) [4280/4603] base_lr: 1.1977e-05 lr: 1.1977e-05 eta: 0:06:08 time: 1.2194 data_time: 0.0096 memory: 16133 loss: 0.1416 tflops: 2.3793 tokens_per_sec: 39.3640 +2025/09/26 00:29:08 - mmengine - INFO - Iter(train) [4290/4603] base_lr: 1.1942e-05 lr: 1.1942e-05 eta: 0:05:57 time: 1.0118 data_time: 0.0091 memory: 16133 loss: 0.9961 tflops: 4.9604 tokens_per_sec: 82.0292 +2025/09/26 00:29:19 - mmengine - INFO - Iter(train) [4300/4603] base_lr: 1.1906e-05 lr: 1.1906e-05 eta: 0:05:45 time: 1.3657 data_time: 0.0096 memory: 16133 loss: 0.1436 tflops: 2.1244 tokens_per_sec: 35.1471 +2025/09/26 00:29:30 - mmengine - INFO - Iter(train) [4310/4603] base_lr: 1.1871e-05 lr: 1.1871e-05 eta: 0:05:34 time: 0.9838 data_time: 0.0089 memory: 16133 loss: 0.7773 tflops: 5.1632 tokens_per_sec: 85.3808 +2025/09/26 00:29:40 - mmengine - INFO - Iter(train) [4320/4603] base_lr: 1.1836e-05 lr: 1.1836e-05 eta: 0:05:22 time: 0.9758 data_time: 0.0090 memory: 16133 loss: 1.1562 tflops: 4.6475 tokens_per_sec: 76.8633 +2025/09/26 00:29:51 - mmengine - INFO - Iter(train) [4330/4603] base_lr: 1.1801e-05 lr: 1.1801e-05 eta: 0:05:11 time: 0.9888 data_time: 0.0092 memory: 16133 loss: 1.1250 tflops: 3.6681 tokens_per_sec: 60.6777 +2025/09/26 00:30:01 - mmengine - INFO - Iter(train) [4340/4603] base_lr: 1.1765e-05 lr: 1.1765e-05 eta: 0:05:00 time: 0.9965 data_time: 0.0095 memory: 16133 loss: 0.9844 tflops: 2.8508 tokens_per_sec: 47.1658 +2025/09/26 00:30:12 - mmengine - INFO - Iter(train) [4350/4603] base_lr: 1.1730e-05 lr: 1.1730e-05 eta: 0:04:48 time: 0.9955 data_time: 0.0090 memory: 16133 loss: 1.1016 tflops: 5.1634 tokens_per_sec: 85.3836 +2025/09/26 00:30:23 - mmengine - INFO - Iter(train) [4360/4603] base_lr: 1.1694e-05 lr: 1.1694e-05 eta: 0:04:37 time: 0.9829 data_time: 0.0093 memory: 16133 loss: 1.1641 tflops: 3.8134 tokens_per_sec: 63.0780 +2025/09/26 00:30:34 - mmengine - INFO - Iter(train) [4370/4603] base_lr: 1.1659e-05 lr: 1.1659e-05 eta: 0:04:25 time: 1.3418 data_time: 0.0092 memory: 16133 loss: 1.1719 tflops: 4.6433 tokens_per_sec: 76.7646 +2025/09/26 00:30:44 - mmengine - INFO - Iter(train) [4380/4603] base_lr: 1.1624e-05 lr: 1.1624e-05 eta: 0:04:14 time: 1.0479 data_time: 0.0095 memory: 16133 loss: 0.1279 tflops: 3.4614 tokens_per_sec: 57.2579 +2025/09/26 00:30:55 - mmengine - INFO - Iter(train) [4390/4603] base_lr: 1.1588e-05 lr: 1.1588e-05 eta: 0:04:02 time: 1.0081 data_time: 0.0092 memory: 16133 loss: 1.1094 tflops: 4.3785 tokens_per_sec: 72.4149 +2025/09/26 00:31:06 - mmengine - INFO - Iter(train) [4400/4603] base_lr: 1.1553e-05 lr: 1.1553e-05 eta: 0:03:51 time: 1.1679 data_time: 0.0093 memory: 16133 loss: 0.0452 tflops: 3.4683 tokens_per_sec: 57.3658 +2025/09/26 00:31:17 - mmengine - INFO - Iter(train) [4410/4603] base_lr: 1.1517e-05 lr: 1.1517e-05 eta: 0:03:39 time: 0.9980 data_time: 0.0089 memory: 16133 loss: 0.1475 tflops: 2.9070 tokens_per_sec: 48.0943 +2025/09/26 00:31:28 - mmengine - INFO - Iter(train) [4420/4603] base_lr: 1.1482e-05 lr: 1.1482e-05 eta: 0:03:28 time: 0.9995 data_time: 0.0092 memory: 16133 loss: 2.5938 tflops: 5.4454 tokens_per_sec: 90.0415 +2025/09/26 00:31:39 - mmengine - INFO - Iter(train) [4430/4603] base_lr: 1.1446e-05 lr: 1.1446e-05 eta: 0:03:17 time: 1.0021 data_time: 0.0090 memory: 16133 loss: 0.1875 tflops: 2.6539 tokens_per_sec: 43.9089 +2025/09/26 00:31:50 - mmengine - INFO - Iter(train) [4440/4603] base_lr: 1.1411e-05 lr: 1.1411e-05 eta: 0:03:05 time: 1.3410 data_time: 0.0076 memory: 16133 loss: 0.0234 tflops: 3.2463 tokens_per_sec: 53.6906 +2025/09/26 00:32:00 - mmengine - INFO - Iter(train) [4450/4603] base_lr: 1.1375e-05 lr: 1.1375e-05 eta: 0:02:54 time: 1.0109 data_time: 0.0107 memory: 16133 loss: 0.0767 tflops: 3.8874 tokens_per_sec: 64.3004 +2025/09/26 00:32:11 - mmengine - INFO - Iter(train) [4460/4603] base_lr: 1.1339e-05 lr: 1.1339e-05 eta: 0:02:42 time: 1.0368 data_time: 0.0090 memory: 16133 loss: 1.2031 tflops: 4.7240 tokens_per_sec: 78.1224 +2025/09/26 00:32:22 - mmengine - INFO - Iter(train) [4470/4603] base_lr: 1.1304e-05 lr: 1.1304e-05 eta: 0:02:31 time: 0.9963 data_time: 0.0093 memory: 16133 loss: 1.2578 tflops: 5.2808 tokens_per_sec: 87.3219 +2025/09/26 00:32:33 - mmengine - INFO - Iter(train) [4480/4603] base_lr: 1.1268e-05 lr: 1.1268e-05 eta: 0:02:20 time: 1.6541 data_time: 0.0090 memory: 16133 loss: 1.3984 tflops: 3.6932 tokens_per_sec: 61.0589 +2025/09/26 00:32:43 - mmengine - INFO - Iter(train) [4490/4603] base_lr: 1.1233e-05 lr: 1.1233e-05 eta: 0:02:08 time: 0.9944 data_time: 0.0091 memory: 16133 loss: 1.1562 tflops: 6.5087 tokens_per_sec: 107.5992 +2025/09/26 00:32:54 - mmengine - INFO - Iter(train) [4500/4603] base_lr: 1.1197e-05 lr: 1.1197e-05 eta: 0:01:57 time: 1.2273 data_time: 0.0089 memory: 16133 loss: 0.0036 tflops: 3.1526 tokens_per_sec: 52.1471 +2025/09/26 00:33:05 - mmengine - INFO - Iter(train) [4510/4603] base_lr: 1.1161e-05 lr: 1.1161e-05 eta: 0:01:45 time: 1.2931 data_time: 0.0094 memory: 16133 loss: 0.1387 tflops: 2.3373 tokens_per_sec: 38.6681 +2025/09/26 00:33:15 - mmengine - INFO - Iter(train) [4520/4603] base_lr: 1.1126e-05 lr: 1.1126e-05 eta: 0:01:34 time: 0.9910 data_time: 0.0094 memory: 16133 loss: 0.1396 tflops: 5.2479 tokens_per_sec: 86.7793 +2025/09/26 00:33:26 - mmengine - INFO - Iter(train) [4530/4603] base_lr: 1.1090e-05 lr: 1.1090e-05 eta: 0:01:23 time: 1.2193 data_time: 0.0096 memory: 16133 loss: 0.1436 tflops: 2.2803 tokens_per_sec: 37.7277 +2025/09/26 00:33:36 - mmengine - INFO - Iter(train) [4540/4603] base_lr: 1.1054e-05 lr: 1.1054e-05 eta: 0:01:11 time: 1.2980 data_time: 0.0099 memory: 16133 loss: 1.3594 tflops: 3.3072 tokens_per_sec: 54.6992 +2025/09/26 00:33:47 - mmengine - INFO - Iter(train) [4550/4603] base_lr: 1.1018e-05 lr: 1.1018e-05 eta: 0:01:00 time: 1.0038 data_time: 0.0088 memory: 16133 loss: 1.1953 tflops: 3.9750 tokens_per_sec: 65.7489 +2025/09/26 00:33:58 - mmengine - INFO - Iter(train) [4560/4603] base_lr: 1.0983e-05 lr: 1.0983e-05 eta: 0:00:48 time: 0.9925 data_time: 0.0089 memory: 16133 loss: 1.6562 tflops: 4.8742 tokens_per_sec: 80.6071 +2025/09/26 00:34:08 - mmengine - INFO - Iter(train) [4570/4603] base_lr: 1.0947e-05 lr: 1.0947e-05 eta: 0:00:37 time: 1.3469 data_time: 0.0089 memory: 16133 loss: 0.1260 tflops: 2.4235 tokens_per_sec: 40.0924 +2025/09/26 00:34:19 - mmengine - INFO - Iter(train) [4580/4603] base_lr: 1.0911e-05 lr: 1.0911e-05 eta: 0:00:26 time: 0.9943 data_time: 0.0087 memory: 16133 loss: 0.0078 tflops: 4.0132 tokens_per_sec: 66.3796 +2025/09/26 00:34:29 - mmengine - INFO - Iter(train) [4590/4603] base_lr: 1.0875e-05 lr: 1.0875e-05 eta: 0:00:14 time: 0.9909 data_time: 0.0094 memory: 16133 loss: 0.0457 tflops: 3.7824 tokens_per_sec: 62.5665 +2025/09/26 00:34:40 - mmengine - INFO - Iter(train) [4600/4603] base_lr: 1.0840e-05 lr: 1.0840e-05 eta: 0:00:03 time: 0.9814 data_time: 0.0088 memory: 16133 loss: 1.5000 tflops: 6.2866 tokens_per_sec: 103.9336 +2025/09/26 00:34:43 - mmengine - INFO - Exp name: temp_config_stage_2b_20250925_230352 +2025/09/26 00:34:43 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/26 00:34:44 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:34:47 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern, which is a characteristic feature of papillary thyroid carcinoma. This pattern is evident in the slide, reflecting the glandular architecture typical of this type of cancer.<|im_end|> + +2025/09/26 00:34:47 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:34:58 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and nuclear atypia. The tumor is characterized by glandular structures with irregular shapes and sizes, and there is marked nuclear pleomorphism. The presence of microvascular invasion and lymphovascular invasion is noted, suggesting aggressive tumor behavior. The tumor cells exhibit high mitotic activity, indicating a high proliferation rate. The absence of lymphovascular invasion and perineural invasion is a positive finding, but the presence of vascular invasion is concerning. The tumor is poorly differentiated, reflecting a high degree of malignancy. The absence of lymphovascular invasion and perineural invasion is a positive finding, but the presence of vascular invasion is concerning.<|im_end|> + +2025/09/26 00:34:58 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:34:59 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 00:34:59 - mmengine - INFO - Saving checkpoint at 4603 iterations +2025/09/26 00:35:10 - mmengine - INFO - after_train in EvaluateChatHook. +2025/09/26 00:35:10 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:35:14 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. This arrangement is typical of papillary thyroid carcinoma, where the cells form distinct lobules. The lobulated structure is evident in the slide, contributing to the overall architecture of the tumor.<|im_end|> + +2025/09/26 00:35:14 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:35:24 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and nuclear atypia. The tumor is characterized by glandular structures with irregular shapes and poorly defined borders, reflecting a high degree of architectural deviation. The nuclear features include enlarged, irregular nuclei with prominent nucleoli and marked pleomorphism, indicating aggressive cellular behavior. The presence of microvascular invasion and lymphovascular invasion suggests a high risk of metastasis. The tumor is infiltrative, extending into the muscularis propria and surrounding fat, and is associated with lymph node metastasis. The overall histological grade is high, reflecting a poorly differentiated and aggressive tumor.<|im_end|> + +2025/09/26 00:35:24 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 00:35:25 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/20250925_230352.json b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/20250925_230352.json new file mode 100644 index 0000000000000000000000000000000000000000..dd927faedb8264f20025a135c31b3210aba678e0 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/20250925_230352.json @@ -0,0 +1,460 @@ +{"base_lr": 5.882352941176473e-07, "lr": 5.882352941176473e-07, "data_time": 0.008524894714355469, "loss": 0.21875, "time": 0.9872832298278809, "tflops": 4.164193257175627, "tokens_per_sec": 68.87587872001633, "iter": 10, "memory": 15742, "step": 10} +{"base_lr": 1.0196078431372559e-06, "lr": 1.0196078431372559e-06, "data_time": 0.008862972259521484, "loss": 1.390625, "time": 1.0128672122955322, "tflops": 3.8797814928276297, "tokens_per_sec": 64.17425622122938, "iter": 20, "memory": 15742, "step": 20} +{"base_lr": 1.4509803921568641e-06, "lr": 1.4509803921568641e-06, "data_time": 0.008648395538330078, "loss": 0.69140625, "time": 1.0541915893554688, "tflops": 2.8095105908405205, "tokens_per_sec": 46.481114528633306, "iter": 30, "memory": 15742, "step": 30} +{"base_lr": 1.8823529411764717e-06, "lr": 1.8823529411764717e-06, "data_time": 0.009172916412353516, "loss": 0.318359375, "time": 1.074812650680542, "tflops": 2.3617301920128884, "tokens_per_sec": 39.076577646688165, "iter": 40, "memory": 15742, "step": 40} +{"base_lr": 2.31372549019608e-06, "lr": 2.31372549019608e-06, "data_time": 0.009110689163208008, "loss": 0.0673828125, "time": 1.0274059772491455, "tflops": 3.8837747838296752, "tokens_per_sec": 64.23945495883638, "iter": 50, "memory": 15743, "step": 50} +{"base_lr": 2.7450980392156876e-06, "lr": 2.7450980392156876e-06, "data_time": 0.0076406002044677734, "loss": 1.5703125, "time": 0.9788403511047363, "tflops": 5.251323950870456, "tokens_per_sec": 86.83744995185444, "iter": 60, "memory": 15742, "step": 60} +{"base_lr": 3.176470588235296e-06, "lr": 3.176470588235296e-06, "data_time": 0.008695602416992188, "loss": 2.046875, "time": 0.9778482913970947, "tflops": 5.318565160331918, "tokens_per_sec": 87.94820296412246, "iter": 70, "memory": 16133, "step": 70} +{"base_lr": 3.607843137254902e-06, "lr": 3.607843137254902e-06, "data_time": 0.008966684341430664, "loss": 0.08203125, "time": 0.9880132675170898, "tflops": 4.773679026825057, "tokens_per_sec": 78.94630827775993, "iter": 80, "memory": 16133, "step": 80} +{"base_lr": 4.0392156862745096e-06, "lr": 4.0392156862745096e-06, "data_time": 0.00854039192199707, "loss": 1.2265625, "time": 1.202319860458374, "tflops": 4.375951615090016, "tokens_per_sec": 72.36011219739783, "iter": 90, "memory": 16133, "step": 90} +{"base_lr": 4.470588235294117e-06, "lr": 4.470588235294117e-06, "data_time": 0.00881052017211914, "loss": 1.015625, "time": 1.040710210800171, "tflops": 4.4737996564554905, "tokens_per_sec": 73.98793554713278, "iter": 100, "memory": 16133, "step": 100} +{"base_lr": 4.901960784313724e-06, "lr": 4.901960784313724e-06, "data_time": 0.00935053825378418, "loss": 1.984375, "time": 0.9840950965881348, "tflops": 4.8541945441949705, "tokens_per_sec": 80.27679466528522, "iter": 110, "memory": 16133, "step": 110} +{"base_lr": 5.333333333333329e-06, "lr": 5.333333333333329e-06, "data_time": 0.00858759880065918, "loss": 0.275390625, "time": 1.0231690406799316, "tflops": 3.308530578668718, "tokens_per_sec": 54.73191405667562, "iter": 120, "memory": 16133, "step": 120} +{"base_lr": 5.7647058823529375e-06, "lr": 5.7647058823529375e-06, "data_time": 0.008618593215942383, "loss": 1.578125, "time": 1.0206077098846436, "tflops": 4.621225342207601, "tokens_per_sec": 76.42505464586358, "iter": 130, "memory": 16133, "step": 130} +{"base_lr": 6.196078431372542e-06, "lr": 6.196078431372542e-06, "data_time": 0.00869297981262207, "loss": 0.0869140625, "time": 1.3174896240234375, "tflops": 3.2123739385149777, "tokens_per_sec": 53.13134822737822, "iter": 140, "memory": 16133, "step": 140} +{"base_lr": 6.62745098039215e-06, "lr": 6.62745098039215e-06, "data_time": 0.009142637252807617, "loss": 1.515625, "time": 0.9733965396881104, "tflops": 4.783178071538082, "tokens_per_sec": 79.10445215327431, "iter": 150, "memory": 16133, "step": 150} +{"base_lr": 7.058823529411758e-06, "lr": 7.058823529411758e-06, "data_time": 0.009024381637573242, "loss": 0.09521484375, "time": 1.3030588626861572, "tflops": 2.505028702464441, "tokens_per_sec": 41.44095216745745, "iter": 160, "memory": 16133, "step": 160} +{"base_lr": 7.4901960784313645e-06, "lr": 7.4901960784313645e-06, "data_time": 0.009188175201416016, "loss": 1.3359375, "time": 0.9896304607391357, "tflops": 4.46007706480148, "tokens_per_sec": 73.7649081106537, "iter": 170, "memory": 16133, "step": 170} +{"base_lr": 7.921568627450973e-06, "lr": 7.921568627450973e-06, "data_time": 0.008887767791748047, "loss": 0.10107421875, "time": 1.0033645629882812, "tflops": 4.519668631000321, "tokens_per_sec": 74.74850395010533, "iter": 180, "memory": 16133, "step": 180} +{"base_lr": 8.35294117647058e-06, "lr": 8.35294117647058e-06, "data_time": 0.008906126022338867, "loss": 2.03125, "time": 1.0010943412780762, "tflops": 5.920900506742169, "tokens_per_sec": 97.89287178947346, "iter": 190, "memory": 16133, "step": 190} +{"base_lr": 8.784313725490186e-06, "lr": 8.784313725490186e-06, "data_time": 0.009262800216674805, "loss": 0.018798828125, "time": 0.9854555130004883, "tflops": 6.014863090594988, "tokens_per_sec": 99.44639682567994, "iter": 200, "memory": 16133, "step": 200} +{"base_lr": 9.215686274509796e-06, "lr": 9.215686274509796e-06, "data_time": 0.00849294662475586, "loss": 0.24609375, "time": 1.0007083415985107, "tflops": 2.8992261261044914, "tokens_per_sec": 47.966023670071365, "iter": 210, "memory": 16133, "step": 210} +{"base_lr": 9.647058823529404e-06, "lr": 9.647058823529404e-06, "data_time": 0.008557796478271484, "loss": 0.60546875, "time": 1.041980266571045, "tflops": 3.306858757630567, "tokens_per_sec": 54.70353117868656, "iter": 220, "memory": 16133, "step": 220} +{"base_lr": 1.0078431372549012e-05, "lr": 1.0078431372549012e-05, "data_time": 0.008387565612792969, "loss": 1.4921875, "time": 0.9833061695098877, "tflops": 4.365667028221028, "tokens_per_sec": 72.20538444838249, "iter": 230, "memory": 16133, "step": 230} +{"base_lr": 1.0509803921568623e-05, "lr": 1.0509803921568623e-05, "data_time": 0.009382009506225586, "loss": 0.2333984375, "time": 1.0031208992004395, "tflops": 2.8922533374448074, "tokens_per_sec": 47.85066290435345, "iter": 240, "memory": 16133, "step": 240} +{"base_lr": 1.094117647058823e-05, "lr": 1.094117647058823e-05, "data_time": 0.008673906326293945, "loss": 0.060546875, "time": 0.9933228492736816, "tflops": 4.199795616055937, "tokens_per_sec": 69.46382039875895, "iter": 250, "memory": 16133, "step": 250} +{"base_lr": 1.1372549019607831e-05, "lr": 1.1372549019607831e-05, "data_time": 0.008640050888061523, "loss": 1.6796875, "time": 1.219836950302124, "tflops": 4.759864760480628, "tokens_per_sec": 78.69904250411862, "iter": 260, "memory": 16133, "step": 260} +{"base_lr": 1.1803921568627442e-05, "lr": 1.1803921568627442e-05, "data_time": 0.009214401245117188, "loss": 0.7421875, "time": 1.002384901046753, "tflops": 2.8340398032159366, "tokens_per_sec": 46.8881763391216, "iter": 270, "memory": 16133, "step": 270} +{"base_lr": 1.2235294117647047e-05, "lr": 1.2235294117647047e-05, "data_time": 0.008814096450805664, "loss": 0.1767578125, "time": 1.2698991298675537, "tflops": 2.284653718029013, "tokens_per_sec": 37.79827773011266, "iter": 280, "memory": 16133, "step": 280} +{"base_lr": 1.2666666666666648e-05, "lr": 1.2666666666666648e-05, "data_time": 0.008632898330688477, "loss": 1.9375, "time": 0.9910604953765869, "tflops": 3.049507796240577, "tokens_per_sec": 50.45100701037464, "iter": 290, "memory": 16133, "step": 290} +{"base_lr": 1.3098039215686255e-05, "lr": 1.3098039215686255e-05, "data_time": 0.00885772705078125, "loss": 0.05322265625, "time": 1.0014095306396484, "tflops": 4.891161294926447, "tokens_per_sec": 80.8859887204993, "iter": 300, "memory": 16133, "step": 300} +{"base_lr": 1.352941176470586e-05, "lr": 1.352941176470586e-05, "data_time": 0.008812665939331055, "loss": 1.8203125, "time": 1.002932071685791, "tflops": 5.3666503097259435, "tokens_per_sec": 88.7398085199474, "iter": 310, "memory": 16133, "step": 310} +{"base_lr": 1.3960784313725465e-05, "lr": 1.3960784313725465e-05, "data_time": 0.008949041366577148, "loss": 3.640625, "time": 1.2422642707824707, "tflops": 3.163339361343473, "tokens_per_sec": 52.32381026221242, "iter": 320, "memory": 16133, "step": 320} +{"base_lr": 1.4392156862745072e-05, "lr": 1.4392156862745072e-05, "data_time": 0.008502960205078125, "loss": 0.2158203125, "time": 0.9795055389404297, "tflops": 2.9619840350381974, "tokens_per_sec": 49.00431706784886, "iter": 330, "memory": 16133, "step": 330} +{"base_lr": 1.4823529411764678e-05, "lr": 1.4823529411764678e-05, "data_time": 0.008882761001586914, "loss": 1.484375, "time": 0.9972810745239258, "tflops": 3.9404171655738987, "tokens_per_sec": 65.17721198205231, "iter": 340, "memory": 16133, "step": 340} +{"base_lr": 1.5254901960784286e-05, "lr": 1.5254901960784286e-05, "data_time": 0.0091705322265625, "loss": 0.7890625, "time": 0.9842259883880615, "tflops": 2.8863276739539225, "tokens_per_sec": 47.75326048535618, "iter": 350, "memory": 16133, "step": 350} +{"base_lr": 1.5686274509803884e-05, "lr": 1.5686274509803884e-05, "data_time": 0.00878143310546875, "loss": 1.7578125, "time": 1.0178942680358887, "tflops": 5.049844508657472, "tokens_per_sec": 83.50572615359258, "iter": 360, "memory": 16133, "step": 360} +{"base_lr": 1.6117647058823492e-05, "lr": 1.6117647058823492e-05, "data_time": 0.00905752182006836, "loss": 1.4765625, "time": 1.0026702880859375, "tflops": 5.971995990922202, "tokens_per_sec": 98.73634551282935, "iter": 370, "memory": 16133, "step": 370} +{"base_lr": 1.6549019607843093e-05, "lr": 1.6549019607843093e-05, "data_time": 0.00919795036315918, "loss": 0.0252685546875, "time": 0.9891290664672852, "tflops": 3.3000747452371644, "tokens_per_sec": 54.593482115340734, "iter": 380, "memory": 16133, "step": 380} +{"base_lr": 1.698039215686269e-05, "lr": 1.698039215686269e-05, "data_time": 0.009583234786987305, "loss": 0.029296875, "time": 1.0506329536437988, "tflops": 3.682727726109483, "tokens_per_sec": 60.91566020081006, "iter": 390, "memory": 16133, "step": 390} +{"base_lr": 1.7411764705882296e-05, "lr": 1.7411764705882296e-05, "data_time": 0.008723020553588867, "loss": 0.10595703125, "time": 1.0348429679870605, "tflops": 2.978937558954903, "tokens_per_sec": 49.282839597542115, "iter": 400, "memory": 16133, "step": 400} +{"base_lr": 1.7843137254901907e-05, "lr": 1.7843137254901907e-05, "data_time": 0.008978605270385742, "loss": 0.84375, "time": 0.9999411106109619, "tflops": 3.0829141358868366, "tokens_per_sec": 51.00300353566632, "iter": 410, "memory": 16133, "step": 410} +{"base_lr": 1.8274509803921515e-05, "lr": 1.8274509803921515e-05, "data_time": 0.008895158767700195, "loss": 0.03564453125, "time": 1.0184733867645264, "tflops": 4.214923412493927, "tokens_per_sec": 69.71217993774212, "iter": 420, "memory": 16133, "step": 420} +{"base_lr": 1.870588235294112e-05, "lr": 1.870588235294112e-05, "data_time": 0.009042739868164062, "loss": 1.4375, "time": 1.2031400203704834, "tflops": 3.668590559570265, "tokens_per_sec": 60.67456718583794, "iter": 430, "memory": 16133, "step": 430} +{"base_lr": 1.9137254901960735e-05, "lr": 1.9137254901960735e-05, "data_time": 0.008719682693481445, "loss": 0.0390625, "time": 0.9837861061096191, "tflops": 4.240508096017895, "tokens_per_sec": 70.13719707100792, "iter": 440, "memory": 16133, "step": 440} +{"base_lr": 1.9568627450980336e-05, "lr": 1.9568627450980336e-05, "data_time": 0.008626937866210938, "loss": 0.31640625, "time": 1.000011920928955, "tflops": 2.9617271285303546, "tokens_per_sec": 48.999415881395436, "iter": 450, "memory": 16133, "step": 450} +{"base_lr": 1.999999999999995e-05, "lr": 1.999999999999995e-05, "data_time": 0.009011507034301758, "loss": 0.03662109375, "time": 1.2423996925354004, "tflops": 3.65009374039839, "tokens_per_sec": 60.36704649120203, "iter": 460, "memory": 16133, "step": 460} +{"base_lr": 1.9999947744086436e-05, "lr": 1.9999947744086436e-05, "data_time": 0.008683919906616211, "loss": 1.40625, "time": 1.0127382278442383, "tflops": 5.1353347901021, "tokens_per_sec": 84.91829145521511, "iter": 470, "memory": 16133, "step": 470} +{"base_lr": 1.999976710706184e-05, "lr": 1.999976710706184e-05, "data_time": 0.009174585342407227, "loss": 3.4375, "time": 0.9958086013793945, "tflops": 4.250083125023432, "tokens_per_sec": 70.29463282699675, "iter": 480, "memory": 16133, "step": 480} +{"base_lr": 1.999945744612158e-05, "lr": 1.999945744612158e-05, "data_time": 0.009193897247314453, "loss": 0.0250244140625, "time": 0.9861748218536377, "tflops": 4.352967879271975, "tokens_per_sec": 71.9953485189114, "iter": 490, "memory": 16133, "step": 490} +{"base_lr": 1.999901876526112e-05, "lr": 1.999901876526112e-05, "data_time": 0.008847951889038086, "loss": 0.03759765625, "time": 1.0194499492645264, "tflops": 3.8547291780152584, "tokens_per_sec": 63.759873691523495, "iter": 500, "memory": 16133, "step": 500} +{"base_lr": 1.999845107014062e-05, "lr": 1.999845107014062e-05, "data_time": 0.009894609451293945, "loss": 0.017822265625, "time": 0.988703727722168, "tflops": 4.035802956249438, "tokens_per_sec": 66.75407217487468, "iter": 510, "memory": 16133, "step": 510} +{"base_lr": 1.9997754368084873e-05, "lr": 1.9997754368084873e-05, "data_time": 0.009016036987304688, "loss": 1.4453125, "time": 0.9823880195617676, "tflops": 4.431351800126319, "tokens_per_sec": 73.29079606655333, "iter": 520, "memory": 16133, "step": 520} +{"base_lr": 1.999692866808322e-05, "lr": 1.999692866808322e-05, "data_time": 0.007185459136962891, "loss": 1.90625, "time": 0.9557437896728516, "tflops": 4.364928124864526, "tokens_per_sec": 72.19508067485985, "iter": 530, "memory": 16133, "step": 530} +{"base_lr": 1.99959739807894e-05, "lr": 1.99959739807894e-05, "data_time": 0.009478330612182617, "loss": 0.04736328125, "time": 0.9996399879455566, "tflops": 4.112718796915313, "tokens_per_sec": 68.02448963619837, "iter": 540, "memory": 16133, "step": 540} +{"base_lr": 1.999489031852146e-05, "lr": 1.999489031852146e-05, "data_time": 0.009806156158447266, "loss": 0.734375, "time": 1.0192034244537354, "tflops": 5.162162340699301, "tokens_per_sec": 85.36078069649757, "iter": 550, "memory": 16133, "step": 550} +{"base_lr": 1.9993677695261562e-05, "lr": 1.9993677695261562e-05, "data_time": 0.008526802062988281, "loss": 1.4609375, "time": 1.0077121257781982, "tflops": 5.581543768987468, "tokens_per_sec": 92.28826132075085, "iter": 560, "memory": 16133, "step": 560} +{"base_lr": 1.9992336126655814e-05, "lr": 1.9992336126655814e-05, "data_time": 0.008981466293334961, "loss": 1.1953125, "time": 1.056100606918335, "tflops": 4.236671107427695, "tokens_per_sec": 70.06908197492601, "iter": 570, "memory": 16133, "step": 570} +{"base_lr": 1.9990865630014053e-05, "lr": 1.9990865630014053e-05, "data_time": 0.008991003036499023, "loss": 0.035400390625, "time": 1.3262734413146973, "tflops": 3.0998420389822074, "tokens_per_sec": 51.271478325421526, "iter": 580, "memory": 16133, "step": 580} +{"base_lr": 1.9989266224309644e-05, "lr": 1.9989266224309644e-05, "data_time": 0.009540319442749023, "loss": 0.12060546875, "time": 1.029627799987793, "tflops": 3.7578580419003007, "tokens_per_sec": 62.15838383607805, "iter": 590, "memory": 16133, "step": 590} +{"base_lr": 1.9987537930179225e-05, "lr": 1.9987537930179225e-05, "data_time": 0.009036779403686523, "loss": 1.03125, "time": 0.9971165657043457, "tflops": 4.608690214778731, "tokens_per_sec": 76.21977471233637, "iter": 600, "memory": 16133, "step": 600} +{"base_lr": 1.9985680769922452e-05, "lr": 1.9985680769922452e-05, "data_time": 0.00832056999206543, "loss": 1.5546875, "time": 0.9819443225860596, "tflops": 3.940340627621436, "tokens_per_sec": 65.17681148294, "iter": 610, "memory": 16133, "step": 610} +{"base_lr": 1.9983694767501683e-05, "lr": 1.9983694767501683e-05, "data_time": 0.009741067886352539, "loss": 0.10693359375, "time": 1.0000543594360352, "tflops": 3.2035350150160613, "tokens_per_sec": 52.997119106440884, "iter": 620, "memory": 16133, "step": 620} +{"base_lr": 1.9981579948541705e-05, "lr": 1.9981579948541705e-05, "data_time": 0.00917816162109375, "loss": 1.0234375, "time": 0.9978580474853516, "tflops": 2.846896625094218, "tokens_per_sec": 47.10088786515785, "iter": 630, "memory": 16133, "step": 630} +{"base_lr": 1.9979336340329377e-05, "lr": 1.9979336340329377e-05, "data_time": 0.008788347244262695, "loss": 1.21875, "time": 1.2734053134918213, "tflops": 4.226765552272326, "tokens_per_sec": 69.89133707623854, "iter": 640, "memory": 16133, "step": 640} +{"base_lr": 1.9976963971813275e-05, "lr": 1.9976963971813275e-05, "data_time": 0.008724689483642578, "loss": 0.0198974609375, "time": 0.9978935718536377, "tflops": 3.7560951565864933, "tokens_per_sec": 62.130874222157516, "iter": 650, "memory": 16133, "step": 650} +{"base_lr": 1.9974462873603337e-05, "lr": 1.9974462873603337e-05, "data_time": 0.009305715560913086, "loss": 0.255859375, "time": 0.9893627166748047, "tflops": 3.360438084316289, "tokens_per_sec": 55.59134084291803, "iter": 660, "memory": 16133, "step": 660} +{"base_lr": 1.9971833077970475e-05, "lr": 1.9971833077970475e-05, "data_time": 0.008910894393920898, "loss": 0.01513671875, "time": 1.0174455642700195, "tflops": 3.862323059785078, "tokens_per_sec": 63.885481722622934, "iter": 670, "memory": 16133, "step": 670} +{"base_lr": 1.9969074618846124e-05, "lr": 1.9969074618846124e-05, "data_time": 0.008722543716430664, "loss": 1.7421875, "time": 0.9885158538818359, "tflops": 5.07743849252507, "tokens_per_sec": 83.96425780525477, "iter": 680, "memory": 16133, "step": 680} +{"base_lr": 1.9966187531821836e-05, "lr": 1.9966187531821836e-05, "data_time": 0.008638620376586914, "loss": 0.002197265625, "time": 0.9926965236663818, "tflops": 4.446301578925365, "tokens_per_sec": 73.53707629629996, "iter": 690, "memory": 16133, "step": 690} +{"base_lr": 1.9963171854148803e-05, "lr": 1.9963171854148803e-05, "data_time": 0.008794307708740234, "loss": 0.265625, "time": 1.0040645599365234, "tflops": 2.5281417043839784, "tokens_per_sec": 41.82997954097034, "iter": 700, "memory": 16133, "step": 700} +{"base_lr": 1.9960027624737383e-05, "lr": 1.9960027624737383e-05, "data_time": 0.009371280670166016, "loss": 0.0810546875, "time": 1.011732578277588, "tflops": 3.3459296763970214, "tokens_per_sec": 55.35059481358323, "iter": 710, "memory": 16133, "step": 710} +{"base_lr": 1.995675488415661e-05, "lr": 1.995675488415661e-05, "data_time": 0.009412765502929688, "loss": 0.1962890625, "time": 1.0025129318237305, "tflops": 2.894007325466831, "tokens_per_sec": 47.87968162428836, "iter": 720, "memory": 16133, "step": 720} +{"base_lr": 1.9953353674633637e-05, "lr": 1.9953353674633637e-05, "data_time": 0.008684158325195312, "loss": 0.197265625, "time": 1.0114426612854004, "tflops": 4.423731664809659, "tokens_per_sec": 73.16282260220595, "iter": 730, "memory": 16133, "step": 730} +{"base_lr": 1.9949824040053233e-05, "lr": 1.9949824040053233e-05, "data_time": 0.009159564971923828, "loss": 0.06787109375, "time": 1.0097177028656006, "tflops": 3.2926947233678874, "tokens_per_sec": 54.47067021193581, "iter": 740, "memory": 16133, "step": 740} +{"base_lr": 1.9946166025957175e-05, "lr": 1.9946166025957175e-05, "data_time": 0.00884699821472168, "loss": 0.05859375, "time": 1.0112392902374268, "tflops": 3.8261914323361914, "tokens_per_sec": 63.288680154931754, "iter": 750, "memory": 16133, "step": 750} +{"base_lr": 1.9942379679543698e-05, "lr": 1.9942379679543698e-05, "data_time": 0.008803844451904297, "loss": 1.328125, "time": 0.9931695461273193, "tflops": 4.992693551994335, "tokens_per_sec": 82.56394924679401, "iter": 760, "memory": 16133, "step": 760} +{"base_lr": 1.9938465049666864e-05, "lr": 1.9938465049666864e-05, "data_time": 0.008560895919799805, "loss": 0.1357421875, "time": 0.9753861427307129, "tflops": 2.9744935277125606, "tokens_per_sec": 49.21127940732161, "iter": 770, "memory": 16133, "step": 770} +{"base_lr": 1.9934422186835943e-05, "lr": 1.9934422186835943e-05, "data_time": 0.009826421737670898, "loss": 0.043701171875, "time": 0.990581750869751, "tflops": 4.028151562142633, "tokens_per_sec": 66.62751453071292, "iter": 780, "memory": 16133, "step": 780} +{"base_lr": 1.993025114321474e-05, "lr": 1.993025114321474e-05, "data_time": 0.009480953216552734, "loss": 0.046630859375, "time": 1.2005980014801025, "tflops": 3.7771804843966157, "tokens_per_sec": 62.46886960287889, "iter": 790, "memory": 16133, "step": 790} +{"base_lr": 1.992595197262094e-05, "lr": 1.992595197262094e-05, "data_time": 0.00901937484741211, "loss": 0.4609375, "time": 0.9970369338989258, "tflops": 3.213230170766735, "tokens_per_sec": 53.15750921351495, "iter": 800, "memory": 16133, "step": 800} +{"base_lr": 1.9921524730525433e-05, "lr": 1.9921524730525433e-05, "data_time": 0.00908207893371582, "loss": 0.01483154296875, "time": 1.0055177211761475, "tflops": 5.894853832746864, "tokens_per_sec": 97.46223058632181, "iter": 810, "memory": 16133, "step": 810} +{"base_lr": 1.9916969474051568e-05, "lr": 1.9916969474051568e-05, "data_time": 0.008567333221435547, "loss": 0.0169677734375, "time": 1.2569775581359863, "tflops": 2.9819014570055837, "tokens_per_sec": 49.324667412433776, "iter": 820, "memory": 16133, "step": 820} +{"base_lr": 1.9912286261974386e-05, "lr": 1.9912286261974386e-05, "data_time": 0.008791923522949219, "loss": 0.00848388671875, "time": 0.9821064472198486, "tflops": 4.864023713875271, "tokens_per_sec": 80.43934567739895, "iter": 830, "memory": 16133, "step": 830} +{"base_lr": 1.9907475154719963e-05, "lr": 1.9907475154719963e-05, "data_time": 0.009096384048461914, "loss": 0.01068115234375, "time": 0.9947891235351562, "tflops": 4.436948511144007, "tokens_per_sec": 73.38238655093897, "iter": 840, "memory": 16133, "step": 840} +{"base_lr": 1.9902536214364513e-05, "lr": 1.9902536214364513e-05, "data_time": 0.00857853889465332, "loss": 1.2734375, "time": 0.9982521533966064, "tflops": 4.724716292695462, "tokens_per_sec": 78.13657073969006, "iter": 850, "memory": 16133, "step": 850} +{"base_lr": 1.9897469504633664e-05, "lr": 1.9897469504633664e-05, "data_time": 0.008818387985229492, "loss": 1.5859375, "time": 1.0080606937408447, "tflops": 6.180392537923417, "tokens_per_sec": 102.17638743325247, "iter": 860, "memory": 16133, "step": 860} +{"base_lr": 1.9892275090901615e-05, "lr": 1.9892275090901615e-05, "data_time": 0.0088653564453125, "loss": 0.53125, "time": 0.9986014366149902, "tflops": 2.9053430750187075, "tokens_per_sec": 48.06722506094119, "iter": 870, "memory": 16133, "step": 870} +{"base_lr": 1.9886953040190286e-05, "lr": 1.9886953040190286e-05, "data_time": 0.009137153625488281, "loss": 1.9765625, "time": 1.2483093738555908, "tflops": 2.6148965315764743, "tokens_per_sec": 43.25850717051786, "iter": 880, "memory": 16133, "step": 880} +{"base_lr": 1.988150342116846e-05, "lr": 1.988150342116846e-05, "data_time": 0.008737325668334961, "loss": 0.0223388671875, "time": 1.3624787330627441, "tflops": 2.662190352200015, "tokens_per_sec": 44.03738461669836, "iter": 890, "memory": 16133, "step": 890} +{"base_lr": 1.9875926304150895e-05, "lr": 1.9875926304150895e-05, "data_time": 0.008765220642089844, "loss": 1.609375, "time": 0.9669051170349121, "tflops": 5.316144975628551, "tokens_per_sec": 87.90934963770907, "iter": 900, "memory": 16133, "step": 900} +{"base_lr": 1.9870221761097415e-05, "lr": 1.9870221761097415e-05, "data_time": 0.009374618530273438, "loss": 1.390625, "time": 0.9999887943267822, "tflops": 4.534926157696055, "tokens_per_sec": 75.00084043483398, "iter": 910, "memory": 16133, "step": 910} +{"base_lr": 1.986438986561198e-05, "lr": 1.986438986561198e-05, "data_time": 0.009124517440795898, "loss": 1.3828125, "time": 1.0255787372589111, "tflops": 4.303743788964113, "tokens_per_sec": 71.17932280366661, "iter": 920, "memory": 16133, "step": 920} +{"base_lr": 1.985843069294174e-05, "lr": 1.985843069294174e-05, "data_time": 0.009217500686645508, "loss": 2.1875, "time": 1.2584664821624756, "tflops": 3.7477821461464464, "tokens_per_sec": 61.9801966166849, "iter": 930, "memory": 16133, "step": 930} +{"base_lr": 1.9852344319976067e-05, "lr": 1.9852344319976067e-05, "data_time": 0.0091094970703125, "loss": 1.140625, "time": 0.9903407096862793, "tflops": 2.685303282899226, "tokens_per_sec": 44.429154097779055, "iter": 940, "memory": 16133, "step": 940} +{"base_lr": 1.984613082524555e-05, "lr": 1.984613082524555e-05, "data_time": 0.0087890625, "loss": 1.640625, "time": 0.9890744686126709, "tflops": 4.9521605218516305, "tokens_per_sec": 81.89474359148413, "iter": 950, "memory": 16133, "step": 950} +{"base_lr": 1.9839790288921006e-05, "lr": 1.9839790288921006e-05, "data_time": 0.009489297866821289, "loss": 0.0247802734375, "time": 1.383854627609253, "tflops": 3.9331690641229264, "tokens_per_sec": 65.03573294791732, "iter": 960, "memory": 16133, "step": 960} +{"base_lr": 1.983332279281242e-05, "lr": 1.983332279281242e-05, "data_time": 0.009531259536743164, "loss": 0.98828125, "time": 1.0031132698059082, "tflops": 5.063902017336285, "tokens_per_sec": 83.73929697507577, "iter": 970, "memory": 16133, "step": 970} +{"base_lr": 1.982672842036791e-05, "lr": 1.982672842036791e-05, "data_time": 0.00927734375, "loss": 0.08740234375, "time": 0.9879059791564941, "tflops": 3.9165620919226813, "tokens_per_sec": 64.78349291354677, "iter": 980, "memory": 16133, "step": 980} +{"base_lr": 1.9820007256672626e-05, "lr": 1.9820007256672626e-05, "data_time": 0.009133100509643555, "loss": 1.15625, "time": 0.9826757907867432, "tflops": 5.169219958619294, "tokens_per_sec": 85.48088880124239, "iter": 990, "memory": 16133, "step": 990} +{"base_lr": 1.9813159388447673e-05, "lr": 1.9813159388447673e-05, "data_time": 0.008687496185302734, "loss": 1.3359375, "time": 1.0025537014007568, "tflops": 5.066728399176701, "tokens_per_sec": 83.78603548373752, "iter": 1000, "memory": 16133, "step": 1000} +{"base_lr": 1.980618490404899e-05, "lr": 1.980618490404899e-05, "data_time": 0.00909733772277832, "loss": 1.6640625, "time": 1.3062586784362793, "tflops": 3.5179872373040473, "tokens_per_sec": 58.18143163720169, "iter": 1010, "memory": 16133, "step": 1010} +{"base_lr": 1.979908389346619e-05, "lr": 1.979908389346619e-05, "data_time": 0.009068012237548828, "loss": 0.06396484375, "time": 0.9959368705749512, "tflops": 3.8849802860641574, "tokens_per_sec": 64.26110117098963, "iter": 1020, "memory": 16133, "step": 1020} +{"base_lr": 1.979185644832144e-05, "lr": 1.979185644832144e-05, "data_time": 0.00920867919921875, "loss": 1.3828125, "time": 0.9904766082763672, "tflops": 5.495267798425173, "tokens_per_sec": 90.86534628670094, "iter": 1030, "memory": 16133, "step": 1030} +{"base_lr": 1.9784502661868234e-05, "lr": 1.9784502661868234e-05, "data_time": 0.00901484489440918, "loss": 0.0157470703125, "time": 0.9842207431793213, "tflops": 2.9477937634204587, "tokens_per_sec": 48.76954721041255, "iter": 1040, "memory": 16133, "step": 1040} +{"base_lr": 1.9777022628990215e-05, "lr": 1.9777022628990215e-05, "data_time": 0.008942842483520508, "loss": 0.09228515625, "time": 1.0177338123321533, "tflops": 4.753228625498831, "tokens_per_sec": 78.60601566985389, "iter": 1050, "memory": 16133, "step": 1050} +{"base_lr": 1.976941644619995e-05, "lr": 1.976941644619995e-05, "data_time": 0.008702278137207031, "loss": 0.515625, "time": 1.3479816913604736, "tflops": 2.152313927680784, "tokens_per_sec": 35.60879224666588, "iter": 1060, "memory": 16133, "step": 1060} +{"base_lr": 1.9761684211637654e-05, "lr": 1.9761684211637654e-05, "data_time": 0.009236335754394531, "loss": 1.40625, "time": 1.0147202014923096, "tflops": 3.9919625026788315, "tokens_per_sec": 66.02805374466742, "iter": 1070, "memory": 16133, "step": 1070} +{"base_lr": 1.9753826025069985e-05, "lr": 1.9753826025069985e-05, "data_time": 0.009155988693237305, "loss": 0.51953125, "time": 1.016932487487793, "tflops": 3.8047708731447525, "tokens_per_sec": 62.93436465781638, "iter": 1080, "memory": 16133, "step": 1080} +{"base_lr": 1.9745841987888698e-05, "lr": 1.9745841987888698e-05, "data_time": 0.009380340576171875, "loss": 0.01055908203125, "time": 1.2443771362304688, "tflops": 3.2552229361425025, "tokens_per_sec": 53.84219787491917, "iter": 1090, "memory": 16133, "step": 1090} +{"base_lr": 1.973773220310936e-05, "lr": 1.973773220310936e-05, "data_time": 0.00909113883972168, "loss": 1.21875, "time": 0.991276741027832, "tflops": 4.574782351993165, "tokens_per_sec": 75.66000178936768, "iter": 1100, "memory": 16133, "step": 1100} +{"base_lr": 1.972949677537004e-05, "lr": 1.972949677537004e-05, "data_time": 0.009260416030883789, "loss": 0.1640625, "time": 0.9921650886535645, "tflops": 4.204696371013364, "tokens_per_sec": 69.5448779532932, "iter": 1110, "memory": 16133, "step": 1110} +{"base_lr": 1.9721135810929925e-05, "lr": 1.9721135810929925e-05, "data_time": 0.008704423904418945, "loss": 0.6640625, "time": 0.9859991073608398, "tflops": 2.8197989549088756, "tokens_per_sec": 46.65318625194153, "iter": 1120, "memory": 16133, "step": 1120} +{"base_lr": 1.9712649417667957e-05, "lr": 1.9712649417667957e-05, "data_time": 0.009140968322753906, "loss": 1.171875, "time": 0.9946060180664062, "tflops": 4.80289578190372, "tokens_per_sec": 79.42843554627078, "iter": 1130, "memory": 16133, "step": 1130} +{"base_lr": 1.9704037705081455e-05, "lr": 1.9704037705081455e-05, "data_time": 0.009226560592651367, "loss": 1.3203125, "time": 1.2457458972930908, "tflops": 3.8346416064529625, "tokens_per_sec": 63.415821935755474, "iter": 1140, "memory": 16133, "step": 1140} +{"base_lr": 1.9695300784284703e-05, "lr": 1.9695300784284703e-05, "data_time": 0.008873462677001953, "loss": 1.15625, "time": 1.3674635887145996, "tflops": 3.6703927537041428, "tokens_per_sec": 60.69631446491264, "iter": 1150, "memory": 16133, "step": 1150} +{"base_lr": 1.96864387680075e-05, "lr": 1.96864387680075e-05, "data_time": 0.008931636810302734, "loss": 1.7734375, "time": 1.319000005722046, "tflops": 3.3922296500819664, "tokens_per_sec": 56.103108172038915, "iter": 1160, "memory": 16133, "step": 1160} +{"base_lr": 1.9677451770593687e-05, "lr": 1.9677451770593687e-05, "data_time": 0.009285926818847656, "loss": 1.4609375, "time": 1.008923053741455, "tflops": 4.074877814840242, "tokens_per_sec": 67.39859868179616, "iter": 1170, "memory": 16133, "step": 1170} +{"base_lr": 1.9668339907999766e-05, "lr": 1.9668339907999766e-05, "data_time": 0.009534120559692383, "loss": 0.0654296875, "time": 1.3686044216156006, "tflops": 2.8271098991348373, "tokens_per_sec": 46.76296451271359, "iter": 1180, "memory": 16133, "step": 1180} +{"base_lr": 1.9659103297793265e-05, "lr": 1.9659103297793265e-05, "data_time": 0.009553194046020508, "loss": 0.0966796875, "time": 0.9937186241149902, "tflops": 4.076329955751324, "tokens_per_sec": 67.42351242496139, "iter": 1190, "memory": 16133, "step": 1190} +{"base_lr": 1.9649742059151334e-05, "lr": 1.9649742059151334e-05, "data_time": 0.010024785995483398, "loss": 0.1435546875, "time": 1.3382084369659424, "tflops": 3.1174164147158594, "tokens_per_sec": 51.56147435177506, "iter": 1200, "memory": 16133, "step": 1200} +{"base_lr": 1.9640256312859143e-05, "lr": 1.9640256312859143e-05, "data_time": 0.009113311767578125, "loss": 0.06494140625, "time": 1.2687287330627441, "tflops": 3.860601095406822, "tokens_per_sec": 63.84343468315725, "iter": 1210, "memory": 16133, "step": 1210} +{"base_lr": 1.963064618130835e-05, "lr": 1.963064618130835e-05, "data_time": 0.009511232376098633, "loss": 0.1416015625, "time": 1.0045092105865479, "tflops": 3.430214012449507, "tokens_per_sec": 56.744128773752216, "iter": 1220, "memory": 16133, "step": 1220} +{"base_lr": 1.9620911788495544e-05, "lr": 1.9620911788495544e-05, "data_time": 0.008709430694580078, "loss": 0.26953125, "time": 1.3086321353912354, "tflops": 2.3094700378071824, "tokens_per_sec": 38.20783446144973, "iter": 1230, "memory": 16133, "step": 1230} +{"base_lr": 1.9611053260020586e-05, "lr": 1.9611053260020586e-05, "data_time": 0.00928044319152832, "loss": 2.203125, "time": 1.002645492553711, "tflops": 3.919334893679352, "tokens_per_sec": 64.82849669466117, "iter": 1240, "memory": 16133, "step": 1240} +{"base_lr": 1.9601070723085036e-05, "lr": 1.9601070723085036e-05, "data_time": 0.008513212203979492, "loss": 0.1796875, "time": 0.9911537170410156, "tflops": 3.110246707402825, "tokens_per_sec": 51.45518714514197, "iter": 1250, "memory": 16133, "step": 1250} +{"base_lr": 1.9590964306490494e-05, "lr": 1.9590964306490494e-05, "data_time": 0.008801460266113281, "loss": 1.734375, "time": 1.024923324584961, "tflops": 4.7789482580257125, "tokens_per_sec": 79.03030212793881, "iter": 1260, "memory": 16133, "step": 1260} +{"base_lr": 1.958073414063693e-05, "lr": 1.958073414063693e-05, "data_time": 0.008933067321777344, "loss": 0.2138671875, "time": 1.0364642143249512, "tflops": 2.5658050921145192, "tokens_per_sec": 42.45202042852462, "iter": 1270, "memory": 16133, "step": 1270} +{"base_lr": 1.9570380357521033e-05, "lr": 1.9570380357521033e-05, "data_time": 0.009038209915161133, "loss": 0.10986328125, "time": 1.3490376472473145, "tflops": 3.3615632225312355, "tokens_per_sec": 55.59518680073938, "iter": 1280, "memory": 16133, "step": 1280} +{"base_lr": 1.9559903090734485e-05, "lr": 1.9559903090734485e-05, "data_time": 0.007570505142211914, "loss": 1.0703125, "time": 1.0035035610198975, "tflops": 3.9159835775392584, "tokens_per_sec": 64.7730636191, "iter": 1290, "memory": 16133, "step": 1290} +{"base_lr": 1.9549302475462224e-05, "lr": 1.9549302475462224e-05, "data_time": 0.009075641632080078, "loss": 1.4140625, "time": 0.9850924015045166, "tflops": 5.095083912235305, "tokens_per_sec": 84.25605544530758, "iter": 1300, "memory": 16133, "step": 1300} +{"base_lr": 1.953857864848072e-05, "lr": 1.953857864848072e-05, "data_time": 0.00930476188659668, "loss": 0.12890625, "time": 0.9715664386749268, "tflops": 4.667591592583451, "tokens_per_sec": 77.19492668171179, "iter": 1310, "memory": 16133, "step": 1310} +{"base_lr": 1.9527731748156227e-05, "lr": 1.9527731748156227e-05, "data_time": 0.009043693542480469, "loss": 0.2041015625, "time": 1.0203123092651367, "tflops": 2.784244276887392, "tokens_per_sec": 46.064327140976, "iter": 1320, "memory": 16133, "step": 1320} +{"base_lr": 1.9516761914442945e-05, "lr": 1.9516761914442945e-05, "data_time": 0.008829116821289062, "loss": 0.068359375, "time": 1.2515339851379395, "tflops": 3.671815079675875, "tokens_per_sec": 60.725478414845306, "iter": 1330, "memory": 16133, "step": 1330} +{"base_lr": 1.950566928888126e-05, "lr": 1.950566928888126e-05, "data_time": 0.009313821792602539, "loss": 0.1904296875, "time": 1.008615493774414, "tflops": 2.8165328861630132, "tokens_per_sec": 46.598530649247955, "iter": 1340, "memory": 16133, "step": 1340} +{"base_lr": 1.949445401459592e-05, "lr": 1.949445401459592e-05, "data_time": 0.00870370864868164, "loss": 0.052490234375, "time": 1.0140647888183594, "tflops": 4.710733575902663, "tokens_per_sec": 77.9042925767859, "iter": 1350, "memory": 16133, "step": 1350} +{"base_lr": 1.9483116236294146e-05, "lr": 1.9483116236294146e-05, "data_time": 0.008784294128417969, "loss": 0.2060546875, "time": 1.03938627243042, "tflops": 2.6167763371720487, "tokens_per_sec": 43.29478000005927, "iter": 1360, "memory": 16133, "step": 1360} +{"base_lr": 1.9471656100263793e-05, "lr": 1.9471656100263793e-05, "data_time": 0.009187459945678711, "loss": 1.3515625, "time": 1.016573429107666, "tflops": 4.877750142790672, "tokens_per_sec": 80.66313524631249, "iter": 1370, "memory": 16133, "step": 1370} +{"base_lr": 1.946007375437147e-05, "lr": 1.946007375437147e-05, "data_time": 0.009354114532470703, "loss": 0.054931640625, "time": 1.2237787246704102, "tflops": 3.3100142316656447, "tokens_per_sec": 54.74845954524155, "iter": 1380, "memory": 16133, "step": 1380} +{"base_lr": 1.9448369348060627e-05, "lr": 1.9448369348060627e-05, "data_time": 0.009099960327148438, "loss": 1.75, "time": 1.001380443572998, "tflops": 3.984712756081639, "tokens_per_sec": 65.90901632194982, "iter": 1390, "memory": 16133, "step": 1390} +{"base_lr": 1.943654303234961e-05, "lr": 1.943654303234961e-05, "data_time": 0.008838415145874023, "loss": 1.5078125, "time": 1.3178200721740723, "tflops": 5.2333243236833855, "tokens_per_sec": 86.50649842648254, "iter": 1400, "memory": 16133, "step": 1400} +{"base_lr": 1.9424594959829723e-05, "lr": 1.9424594959829723e-05, "data_time": 0.008842706680297852, "loss": 0.1943359375, "time": 1.0276379585266113, "tflops": 4.530709424380318, "tokens_per_sec": 74.92911230169501, "iter": 1410, "memory": 16133, "step": 1410} +{"base_lr": 1.9412525284663273e-05, "lr": 1.9412525284663273e-05, "data_time": 0.008708000183105469, "loss": 0.05859375, "time": 0.9853134155273438, "tflops": 5.9542664878526, "tokens_per_sec": 98.44583304286662, "iter": 1420, "memory": 16133, "step": 1420} +{"base_lr": 1.9400334162581558e-05, "lr": 1.9400334162581558e-05, "data_time": 0.009298086166381836, "loss": 1.3203125, "time": 1.1812093257904053, "tflops": 4.249143938660664, "tokens_per_sec": 70.26696978064437, "iter": 1430, "memory": 16133, "step": 1430} +{"base_lr": 1.9388021750882875e-05, "lr": 1.9388021750882875e-05, "data_time": 0.008642435073852539, "loss": 0.2216796875, "time": 1.021488904953003, "tflops": 2.8402459923993426, "tokens_per_sec": 46.99023138402214, "iter": 1440, "memory": 16133, "step": 1440} +{"base_lr": 1.937558820843048e-05, "lr": 1.937558820843048e-05, "data_time": 0.008488893508911133, "loss": 0.1396484375, "time": 1.00783109664917, "tflops": 4.439584115576362, "tokens_per_sec": 73.42500171503069, "iter": 1450, "memory": 16133, "step": 1450} +{"base_lr": 1.936303369565055e-05, "lr": 1.936303369565055e-05, "data_time": 0.00916147232055664, "loss": 0.2060546875, "time": 0.9994895458221436, "tflops": 2.7212304663377314, "tokens_per_sec": 45.02298216930286, "iter": 1460, "memory": 16133, "step": 1460} +{"base_lr": 1.9350358374530084e-05, "lr": 1.9350358374530084e-05, "data_time": 0.008870363235473633, "loss": 1.5234375, "time": 0.9840962886810303, "tflops": 4.485158791206604, "tokens_per_sec": 74.17973306023401, "iter": 1470, "memory": 16133, "step": 1470} +{"base_lr": 1.9337562408614862e-05, "lr": 1.9337562408614862e-05, "data_time": 0.00956869125366211, "loss": 1.390625, "time": 1.017620325088501, "tflops": 4.872732065916515, "tokens_per_sec": 80.58015153420604, "iter": 1480, "memory": 16133, "step": 1480} +{"base_lr": 1.9324645963007276e-05, "lr": 1.9324645963007276e-05, "data_time": 0.008959770202636719, "loss": 1.3203125, "time": 1.3244366645812988, "tflops": 3.698218017986349, "tokens_per_sec": 61.15807736683716, "iter": 1490, "memory": 16133, "step": 1490} +{"base_lr": 1.931160920436425e-05, "lr": 1.931160920436425e-05, "data_time": 0.009025335311889648, "loss": 1.796875, "time": 0.9959440231323242, "tflops": 4.188742389946086, "tokens_per_sec": 69.28100214198803, "iter": 1500, "memory": 16133, "step": 1500} +{"base_lr": 1.9298452300895065e-05, "lr": 1.9298452300895065e-05, "data_time": 0.009225845336914062, "loss": 0.0712890625, "time": 1.2883358001708984, "tflops": 4.083790526142332, "tokens_per_sec": 67.52897807263592, "iter": 1510, "memory": 16133, "step": 1510} +{"base_lr": 1.9285175422359204e-05, "lr": 1.9285175422359204e-05, "data_time": 0.008836746215820312, "loss": 0.158203125, "time": 1.0168769359588623, "tflops": 3.5669780776532134, "tokens_per_sec": 59.00419006294414, "iter": 1520, "memory": 16133, "step": 1520} +{"base_lr": 1.927177874006414e-05, "lr": 1.927177874006414e-05, "data_time": 0.010029315948486328, "loss": 2.203125, "time": 1.3005573749542236, "tflops": 3.53340917352317, "tokens_per_sec": 58.4364838211129, "iter": 1530, "memory": 16133, "step": 1530} +{"base_lr": 1.9258262426863132e-05, "lr": 1.9258262426863132e-05, "data_time": 0.008970260620117188, "loss": 0.06689453125, "time": 0.979363203048706, "tflops": 4.939456041288984, "tokens_per_sec": 81.68573186217588, "iter": 1540, "memory": 16133, "step": 1540} +{"base_lr": 1.924462665715302e-05, "lr": 1.924462665715302e-05, "data_time": 0.008864402770996094, "loss": 0.353515625, "time": 0.9957895278930664, "tflops": 2.6706096865028566, "tokens_per_sec": 44.186044106180624, "iter": 1550, "memory": 16133, "step": 1550} +{"base_lr": 1.9230871606871927e-05, "lr": 1.9230871606871927e-05, "data_time": 0.00916910171508789, "loss": 1.7265625, "time": 0.9844110012054443, "tflops": 4.668173510584612, "tokens_per_sec": 77.20352566850457, "iter": 1560, "memory": 16133, "step": 1560} +{"base_lr": 1.921699745349705e-05, "lr": 1.921699745349705e-05, "data_time": 0.008910417556762695, "loss": 1.4375, "time": 1.3103103637695312, "tflops": 4.06151013418069, "tokens_per_sec": 67.15966112545458, "iter": 1570, "memory": 16133, "step": 1570} +{"base_lr": 1.92030043760423e-05, "lr": 1.92030043760423e-05, "data_time": 0.009324312210083008, "loss": 0.04638671875, "time": 0.9729022979736328, "tflops": 4.287946442717492, "tokens_per_sec": 70.92181829937367, "iter": 1580, "memory": 16133, "step": 1580} +{"base_lr": 1.9188892555056045e-05, "lr": 1.9188892555056045e-05, "data_time": 0.008805513381958008, "loss": 0.1181640625, "time": 0.9898886680603027, "tflops": 4.397774274391839, "tokens_per_sec": 72.73545230193616, "iter": 1590, "memory": 16133, "step": 1590} +{"base_lr": 1.9174662172618773e-05, "lr": 1.9174662172618773e-05, "data_time": 0.00888967514038086, "loss": 2.03125, "time": 1.251549243927002, "tflops": 4.107075933910037, "tokens_per_sec": 67.91582545583783, "iter": 1600, "memory": 16133, "step": 1600} +{"base_lr": 1.916031341234073e-05, "lr": 1.916031341234073e-05, "data_time": 0.009091615676879883, "loss": 0.12255859375, "time": 1.0005128383636475, "tflops": 3.504381505695518, "tokens_per_sec": 57.97027062121644, "iter": 1610, "memory": 16133, "step": 1610} +{"base_lr": 1.914584645935954e-05, "lr": 1.914584645935954e-05, "data_time": 0.009047508239746094, "loss": 0.07373046875, "time": 0.9965634346008301, "tflops": 4.489780351673946, "tokens_per_sec": 74.25518279181715, "iter": 1620, "memory": 16133, "step": 1620} +{"base_lr": 1.9131261500337853e-05, "lr": 1.9131261500337853e-05, "data_time": 0.009209394454956055, "loss": 0.119140625, "time": 1.0046005249023438, "tflops": 3.851476295738815, "tokens_per_sec": 63.70691475216745, "iter": 1630, "memory": 16133, "step": 1630} +{"base_lr": 1.9116558723460897e-05, "lr": 1.9116558723460897e-05, "data_time": 0.008789300918579102, "loss": 1.546875, "time": 1.3550353050231934, "tflops": 2.721463904775646, "tokens_per_sec": 45.01727724275854, "iter": 1640, "memory": 16133, "step": 1640} +{"base_lr": 1.910173831843408e-05, "lr": 1.910173831843408e-05, "data_time": 0.00905752182006836, "loss": 0.2392578125, "time": 0.9816780090332031, "tflops": 3.7565063476852476, "tokens_per_sec": 62.13850105495708, "iter": 1650, "memory": 16133, "step": 1650} +{"base_lr": 1.9086800476480517e-05, "lr": 1.9086800476480517e-05, "data_time": 0.00870060920715332, "loss": 0.20703125, "time": 0.9887776374816895, "tflops": 4.157899625483769, "tokens_per_sec": 68.77178186707371, "iter": 1660, "memory": 16133, "step": 1660} +{"base_lr": 1.90717453903386e-05, "lr": 1.90717453903386e-05, "data_time": 0.00900411605834961, "loss": 0.203125, "time": 0.9985446929931641, "tflops": 2.90550817497856, "tokens_per_sec": 48.06995654452948, "iter": 1670, "memory": 16133, "step": 1670} +{"base_lr": 1.9056573254259453e-05, "lr": 1.9056573254259453e-05, "data_time": 0.008977413177490234, "loss": 0.162109375, "time": 1.0100758075714111, "tflops": 3.7107939660888247, "tokens_per_sec": 61.38153150010504, "iter": 1680, "memory": 16133, "step": 1680} +{"base_lr": 1.9041284264004476e-05, "lr": 1.9041284264004476e-05, "data_time": 0.010865926742553711, "loss": 1.59375, "time": 1.0095634460449219, "tflops": 6.35118927904244, "tokens_per_sec": 104.99587758962737, "iter": 1690, "memory": 16133, "step": 1690} +{"base_lr": 1.9025878616842803e-05, "lr": 1.9025878616842803e-05, "data_time": 0.00914311408996582, "loss": 1.7578125, "time": 1.3444130420684814, "tflops": 3.4181469649284333, "tokens_per_sec": 56.53024600461455, "iter": 1700, "memory": 16133, "step": 1700} +{"base_lr": 1.901035651154876e-05, "lr": 1.901035651154876e-05, "data_time": 0.008980989456176758, "loss": 0.2578125, "time": 1.265146017074585, "tflops": 1.9108278465343103, "tokens_per_sec": 31.61690386731877, "iter": 1710, "memory": 16133, "step": 1710} +{"base_lr": 1.8994718148399294e-05, "lr": 1.8994718148399294e-05, "data_time": 0.009119272232055664, "loss": 0.1396484375, "time": 0.9852135181427002, "tflops": 4.234364298586006, "tokens_per_sec": 70.03557983045852, "iter": 1720, "memory": 16133, "step": 1720} +{"base_lr": 1.897896372917139e-05, "lr": 1.897896372917139e-05, "data_time": 0.009035587310791016, "loss": 0.1455078125, "time": 0.9875144958496094, "tflops": 3.8568429864873357, "tokens_per_sec": 63.79653186329591, "iter": 1730, "memory": 16132, "step": 1730} +{"base_lr": 1.8963093457139462e-05, "lr": 1.8963093457139462e-05, "data_time": 0.008828163146972656, "loss": 1.734375, "time": 0.9862356185913086, "tflops": 5.273334035677221, "tokens_per_sec": 87.20025760451752, "iter": 1740, "memory": 16133, "step": 1740} +{"base_lr": 1.8947107537072736e-05, "lr": 1.8947107537072736e-05, "data_time": 0.008733272552490234, "loss": 0.1943359375, "time": 1.2328109741210938, "tflops": 2.353385741591124, "tokens_per_sec": 38.93540940790346, "iter": 1750, "memory": 16133, "step": 1750} +{"base_lr": 1.8931006175232633e-05, "lr": 1.8931006175232633e-05, "data_time": 0.009323358535766602, "loss": 0.1748046875, "time": 1.0296733379364014, "tflops": 4.93328041373527, "tokens_per_sec": 81.5792707309148, "iter": 1760, "memory": 16133, "step": 1760} +{"base_lr": 1.8914789579370055e-05, "lr": 1.8914789579370055e-05, "data_time": 0.008675575256347656, "loss": 0.10791015625, "time": 1.3282709121704102, "tflops": 3.277423964509896, "tokens_per_sec": 54.2058094777495, "iter": 1770, "memory": 16133, "step": 1770} +{"base_lr": 1.889845795872275e-05, "lr": 1.889845795872275e-05, "data_time": 0.00889444351196289, "loss": 0.212890625, "time": 1.234731674194336, "tflops": 2.2027793242224045, "tokens_per_sec": 36.44516532656871, "iter": 1780, "memory": 16133, "step": 1780} +{"base_lr": 1.8882011524012582e-05, "lr": 1.8882011524012582e-05, "data_time": 0.008870124816894531, "loss": 1.21875, "time": 0.9832320213317871, "tflops": 5.474176589424018, "tokens_per_sec": 90.517800548602, "iter": 1790, "memory": 16133, "step": 1790} +{"base_lr": 1.8865450487442844e-05, "lr": 1.8865450487442844e-05, "data_time": 0.009136199951171875, "loss": 1.4296875, "time": 1.0184617042541504, "tflops": 4.630962748693011, "tokens_per_sec": 76.58609025171458, "iter": 1800, "memory": 16133, "step": 1800} +{"base_lr": 1.8848775062695478e-05, "lr": 1.8848775062695478e-05, "data_time": 0.008932352066040039, "loss": 1.6015625, "time": 0.9700255393981934, "tflops": 5.174222990133545, "tokens_per_sec": 85.56475745103359, "iter": 1810, "memory": 16133, "step": 1810} +{"base_lr": 1.8831985464928373e-05, "lr": 1.8831985464928373e-05, "data_time": 0.009530067443847656, "loss": 0.74609375, "time": 1.0016024112701416, "tflops": 4.829782192497583, "tokens_per_sec": 79.87201218742211, "iter": 1820, "memory": 16133, "step": 1820} +{"base_lr": 1.8815081910772526e-05, "lr": 1.8815081910772526e-05, "data_time": 0.009420394897460938, "loss": 0.53515625, "time": 1.0017173290252686, "tflops": 3.922966440822098, "tokens_per_sec": 64.88856498388026, "iter": 1830, "memory": 16133, "step": 1830} +{"base_lr": 1.8798064618329295e-05, "lr": 1.8798064618329295e-05, "data_time": 0.00950932502746582, "loss": 1.6484375, "time": 1.2904047966003418, "tflops": 3.232902542442132, "tokens_per_sec": 53.47159293094048, "iter": 1840, "memory": 16133, "step": 1840} +{"base_lr": 1.8780933807167557e-05, "lr": 1.8780933807167557e-05, "data_time": 0.009421348571777344, "loss": 1.71875, "time": 1.01251220703125, "tflops": 5.256073738582088, "tokens_per_sec": 86.91253240090276, "iter": 1850, "memory": 16133, "step": 1850} +{"base_lr": 1.8763689698320902e-05, "lr": 1.8763689698320902e-05, "data_time": 0.008850574493408203, "loss": 1.3515625, "time": 0.9921727180480957, "tflops": 7.012053798294158, "tokens_per_sec": 115.90723863696225, "iter": 1860, "memory": 16133, "step": 1860} +{"base_lr": 1.8746332514284753e-05, "lr": 1.8746332514284753e-05, "data_time": 0.008228778839111328, "loss": 0.1806640625, "time": 1.0279145240783691, "tflops": 3.1755557252734747, "tokens_per_sec": 52.5335509276552, "iter": 1870, "memory": 16133, "step": 1870} +{"base_lr": 1.8728862479013512e-05, "lr": 1.8728862479013512e-05, "data_time": 0.008748769760131836, "loss": 0.171875, "time": 0.9852142333984375, "tflops": 3.0062115778134584, "tokens_per_sec": 49.73537565624453, "iter": 1880, "memory": 16133, "step": 1880} +{"base_lr": 1.8711279817917668e-05, "lr": 1.8711279817917668e-05, "data_time": 0.008911371231079102, "loss": 0.125, "time": 1.0064077377319336, "tflops": 4.145191646786643, "tokens_per_sec": 68.56068113648611, "iter": 1890, "memory": 16133, "step": 1890} +{"base_lr": 1.8693584757860872e-05, "lr": 1.8693584757860872e-05, "data_time": 0.009386539459228516, "loss": 0.1328125, "time": 1.3082497119903564, "tflops": 3.4663683081551953, "tokens_per_sec": 57.328504881410225, "iter": 1900, "memory": 16133, "step": 1900} +{"base_lr": 1.8675777527157045e-05, "lr": 1.8675777527157045e-05, "data_time": 0.009516239166259766, "loss": 1.1796875, "time": 0.9849696159362793, "tflops": 5.341579527024749, "tokens_per_sec": 88.32759771702437, "iter": 1910, "memory": 16133, "step": 1910} +{"base_lr": 1.8657858355567394e-05, "lr": 1.8657858355567394e-05, "data_time": 0.009712934494018555, "loss": 0.1904296875, "time": 1.2618374824523926, "tflops": 2.5868623316573314, "tokens_per_sec": 42.794734465335196, "iter": 1920, "memory": 16133, "step": 1920} +{"base_lr": 1.8639827474297472e-05, "lr": 1.8639827474297472e-05, "data_time": 0.008854866027832031, "loss": 0.19921875, "time": 0.9910397529602051, "tflops": 3.9041775032645565, "tokens_per_sec": 64.5786405729633, "iter": 1930, "memory": 16133, "step": 1930} +{"base_lr": 1.8621685115994187e-05, "lr": 1.8621685115994187e-05, "data_time": 0.008912801742553711, "loss": 1.1640625, "time": 0.9983706474304199, "tflops": 3.633097334714218, "tokens_per_sec": 60.09792070146125, "iter": 1940, "memory": 16133, "step": 1940} +{"base_lr": 1.8603431514742813e-05, "lr": 1.8603431514742813e-05, "data_time": 0.008697986602783203, "loss": 1.3671875, "time": 1.0254595279693604, "tflops": 4.894516370538878, "tokens_per_sec": 80.93932304113225, "iter": 1950, "memory": 16133, "step": 1950} +{"base_lr": 1.8585066906063937e-05, "lr": 1.8585066906063937e-05, "data_time": 0.009708881378173828, "loss": 1.4296875, "time": 1.0016229152679443, "tflops": 5.010996025014384, "tokens_per_sec": 82.86551628834668, "iter": 1960, "memory": 16133, "step": 1960} +{"base_lr": 1.8566591526910446e-05, "lr": 1.8566591526910446e-05, "data_time": 0.00975942611694336, "loss": 0.08544921875, "time": 1.020911455154419, "tflops": 4.086302417947608, "tokens_per_sec": 67.58666449628166, "iter": 1970, "memory": 16133, "step": 1970} +{"base_lr": 1.8548005615664474e-05, "lr": 1.8548005615664474e-05, "data_time": 0.010078907012939453, "loss": 1.859375, "time": 0.9981284141540527, "tflops": 3.8764502177081055, "tokens_per_sec": 64.1200060957868, "iter": 1980, "memory": 16133, "step": 1980} +{"base_lr": 1.8529309412134306e-05, "lr": 1.8529309412134306e-05, "data_time": 0.009151935577392578, "loss": 1.3125, "time": 1.306913137435913, "tflops": 3.284676846465031, "tokens_per_sec": 54.3264873281812, "iter": 1990, "memory": 16133, "step": 1990} +{"base_lr": 1.8510503157551303e-05, "lr": 1.8510503157551303e-05, "data_time": 0.009372949600219727, "loss": 0.140625, "time": 1.0017821788787842, "tflops": 8.094232302334712, "tokens_per_sec": 133.76161287860188, "iter": 2000, "memory": 16133, "step": 2000} +{"base_lr": 1.8491587094566775e-05, "lr": 1.8491587094566775e-05, "data_time": 0.008806943893432617, "loss": 0.1162109375, "time": 1.0125634670257568, "tflops": 4.0602276325927935, "tokens_per_sec": 67.15628423734461, "iter": 2010, "memory": 16133, "step": 2010} +{"base_lr": 1.847256146724887e-05, "lr": 1.847256146724887e-05, "data_time": 0.009635686874389648, "loss": 1.8046875, "time": 0.9832067489624023, "tflops": 4.797015702291075, "tokens_per_sec": 79.33224632788132, "iter": 2020, "memory": 16133, "step": 2020} +{"base_lr": 1.845342652107938e-05, "lr": 1.845342652107938e-05, "data_time": 0.008751630783081055, "loss": 1.59375, "time": 1.2931995391845703, "tflops": 3.03874486951553, "tokens_per_sec": 50.262931612963314, "iter": 2030, "memory": 16133, "step": 2030} +{"base_lr": 1.843418250295062e-05, "lr": 1.843418250295062e-05, "data_time": 0.009129762649536133, "loss": 1.671875, "time": 1.360168218612671, "tflops": 4.446881945649959, "tokens_per_sec": 73.52031802501851, "iter": 2040, "memory": 16133, "step": 2040} +{"base_lr": 1.841482966116225e-05, "lr": 1.841482966116225e-05, "data_time": 0.00910639762878418, "loss": 0.365234375, "time": 1.2489264011383057, "tflops": 3.7764100503228324, "tokens_per_sec": 62.45364012550796, "iter": 2050, "memory": 16133, "step": 2050} +{"base_lr": 1.839536824541802e-05, "lr": 1.839536824541802e-05, "data_time": 0.009278059005737305, "loss": 0.1044921875, "time": 0.9893813133239746, "tflops": 4.705899455392991, "tokens_per_sec": 77.82641430858357, "iter": 2060, "memory": 16133, "step": 2060} +{"base_lr": 1.8375798506822576e-05, "lr": 1.8375798506822576e-05, "data_time": 0.009011030197143555, "loss": 0.09521484375, "time": 0.9995815753936768, "tflops": 4.173499242485354, "tokens_per_sec": 69.02888338328556, "iter": 2070, "memory": 16133, "step": 2070} +{"base_lr": 1.8356120697878238e-05, "lr": 1.8356120697878238e-05, "data_time": 0.008310794830322266, "loss": 0.359375, "time": 0.9764566421508789, "tflops": 2.909293239535504, "tokens_per_sec": 48.133217565526664, "iter": 2080, "memory": 16133, "step": 2080} +{"base_lr": 1.833633507248171e-05, "lr": 1.833633507248171e-05, "data_time": 0.009128093719482422, "loss": 1.3828125, "time": 1.0164463520050049, "tflops": 4.282869342115836, "tokens_per_sec": 70.83502228908057, "iter": 2090, "memory": 16133, "step": 2090} +{"base_lr": 1.8316441885920828e-05, "lr": 1.8316441885920828e-05, "data_time": 0.009154558181762695, "loss": 1.0390625, "time": 1.35164213180542, "tflops": 2.9521227056360155, "tokens_per_sec": 48.82949298997762, "iter": 2100, "memory": 16133, "step": 2100} +{"base_lr": 1.829644139487125e-05, "lr": 1.829644139487125e-05, "data_time": 0.009849309921264648, "loss": 0.2109375, "time": 1.004249095916748, "tflops": 2.889004112993123, "tokens_per_sec": 47.796906360303446, "iter": 2110, "memory": 16133, "step": 2110} +{"base_lr": 1.8276333857393156e-05, "lr": 1.8276333857393156e-05, "data_time": 0.00888824462890625, "loss": 1.5, "time": 0.9867494106292725, "tflops": 5.147879751301228, "tokens_per_sec": 85.12799612045998, "iter": 2120, "memory": 16133, "step": 2120} +{"base_lr": 1.8256119532927907e-05, "lr": 1.8256119532927907e-05, "data_time": 0.008811712265014648, "loss": 1.03125, "time": 1.301722764968872, "tflops": 3.99528224801413, "tokens_per_sec": 66.06629484734442, "iter": 2130, "memory": 16133, "step": 2130} +{"base_lr": 1.823579868229471e-05, "lr": 1.823579868229471e-05, "data_time": 0.009162425994873047, "loss": 0.020263671875, "time": 1.042445182800293, "tflops": 4.698621680497179, "tokens_per_sec": 77.7019274839317, "iter": 2140, "memory": 16133, "step": 2140} +{"base_lr": 1.821537156768724e-05, "lr": 1.821537156768724e-05, "data_time": 0.008759021759033203, "loss": 1.2734375, "time": 1.0112552642822266, "tflops": 4.484402208787808, "tokens_per_sec": 74.16525050493524, "iter": 2150, "memory": 16133, "step": 2150} +{"base_lr": 1.8194838452670265e-05, "lr": 1.8194838452670265e-05, "data_time": 0.010076045989990234, "loss": 1.5, "time": 1.3205020427703857, "tflops": 3.388371076262416, "tokens_per_sec": 56.039292332099315, "iter": 2160, "memory": 16133, "step": 2160} +{"base_lr": 1.8174199602176248e-05, "lr": 1.8174199602176248e-05, "data_time": 0.007833003997802734, "loss": 0.00531005859375, "time": 0.980004072189331, "tflops": 3.8246608543724996, "tokens_per_sec": 63.26504323745167, "iter": 2170, "memory": 16133, "step": 2170} +{"base_lr": 1.8153455282501915e-05, "lr": 1.8153455282501915e-05, "data_time": 0.009055852890014648, "loss": 0.498046875, "time": 1.0288655757904053, "tflops": 2.8198822439406914, "tokens_per_sec": 46.65332491378022, "iter": 2180, "memory": 16133, "step": 2180} +{"base_lr": 1.8132605761304844e-05, "lr": 1.8132605761304844e-05, "data_time": 0.009076833724975586, "loss": 0.005126953125, "time": 1.2224373817443848, "tflops": 3.4126516498993262, "tokens_per_sec": 56.444608967604076, "iter": 2190, "memory": 16133, "step": 2190} +{"base_lr": 1.8111651307599985e-05, "lr": 1.8111651307599985e-05, "data_time": 0.008786678314208984, "loss": 0.4765625, "time": 1.0142147541046143, "tflops": 3.218450371401406, "tokens_per_sec": 53.24316155075058, "iter": 2200, "memory": 16133, "step": 2200} +{"base_lr": 1.809059219175621e-05, "lr": 1.809059219175621e-05, "data_time": 0.009404897689819336, "loss": 1.5, "time": 1.0102522373199463, "tflops": 5.327764210080213, "tokens_per_sec": 88.09681059060664, "iter": 2210, "memory": 16133, "step": 2210} +{"base_lr": 1.8069428685492783e-05, "lr": 1.8069428685492783e-05, "data_time": 0.009118318557739258, "loss": 1.15625, "time": 1.0003910064697266, "tflops": 4.412102959769219, "tokens_per_sec": 72.97146768395716, "iter": 2220, "memory": 16133, "step": 2220} +{"base_lr": 1.8048161061875924e-05, "lr": 1.8048161061875924e-05, "data_time": 0.009263992309570312, "loss": 0.00168609619140625, "time": 1.0046331882476807, "tflops": 5.056240795157376, "tokens_per_sec": 83.61260705156712, "iter": 2230, "memory": 16133, "step": 2230} +{"base_lr": 1.8026789595315218e-05, "lr": 1.8026789595315218e-05, "data_time": 0.009464025497436523, "loss": 0.056396484375, "time": 1.2655162811279297, "tflops": 3.3921233467265477, "tokens_per_sec": 56.10358480466407, "iter": 2240, "memory": 16133, "step": 2240} +{"base_lr": 1.800531456156013e-05, "lr": 1.800531456156013e-05, "data_time": 0.009188413619995117, "loss": 0.859375, "time": 1.3102881908416748, "tflops": 3.045294504740829, "tokens_per_sec": 50.37059820981364, "iter": 2250, "memory": 16133, "step": 2250} +{"base_lr": 1.798373623769641e-05, "lr": 1.798373623769641e-05, "data_time": 0.009014368057250977, "loss": 0.263671875, "time": 1.0018773078918457, "tflops": 3.1373303333547287, "tokens_per_sec": 51.902562908992024, "iter": 2260, "memory": 16132, "step": 2260} +{"base_lr": 1.7962054902142516e-05, "lr": 1.7962054902142516e-05, "data_time": 0.009068012237548828, "loss": 0.006866455078125, "time": 1.011911392211914, "tflops": 3.9432439024863037, "tokens_per_sec": 65.22310205013788, "iter": 2270, "memory": 16133, "step": 2270} +{"base_lr": 1.794027083464605e-05, "lr": 1.794027083464605e-05, "data_time": 0.009247779846191406, "loss": 0.1474609375, "time": 1.0081837177276611, "tflops": 5.038434187407818, "tokens_per_sec": 83.31814779675646, "iter": 2280, "memory": 16133, "step": 2280} +{"base_lr": 1.7918384316280128e-05, "lr": 1.7918384316280128e-05, "data_time": 0.008588075637817383, "loss": 0.263671875, "time": 0.9813225269317627, "tflops": 2.771608037350554, "tokens_per_sec": 45.856483230495805, "iter": 2290, "memory": 16133, "step": 2290} +{"base_lr": 1.789639562943975e-05, "lr": 1.789639562943975e-05, "data_time": 0.008626937866210938, "loss": 0.17578125, "time": 1.0150651931762695, "tflops": 2.917805137029028, "tokens_per_sec": 48.27276152246382, "iter": 2300, "memory": 16133, "step": 2300} +{"base_lr": 1.7874305057838177e-05, "lr": 1.7874305057838177e-05, "data_time": 0.009023189544677734, "loss": 0.1240234375, "time": 1.0141160488128662, "tflops": 3.1591149367206377, "tokens_per_sec": 52.26226333957542, "iter": 2310, "memory": 16133, "step": 2310} +{"base_lr": 1.785211288650325e-05, "lr": 1.785211288650325e-05, "data_time": 0.008720159530639648, "loss": 0.2890625, "time": 1.0096406936645508, "tflops": 2.8735764978358382, "tokens_per_sec": 47.54166536783854, "iter": 2320, "memory": 16133, "step": 2320} +{"base_lr": 1.7829819401773726e-05, "lr": 1.7829819401773726e-05, "data_time": 0.00902104377746582, "loss": 0.2373046875, "time": 0.9972794055938721, "tflops": 2.969842171066717, "tokens_per_sec": 49.13367279531031, "iter": 2330, "memory": 16133, "step": 2330} +{"base_lr": 1.7807424891295573e-05, "lr": 1.7807424891295573e-05, "data_time": 0.008963823318481445, "loss": 1.4296875, "time": 0.9896371364593506, "tflops": 4.765846025388077, "tokens_per_sec": 78.81676740525697, "iter": 2340, "memory": 16133, "step": 2340} +{"base_lr": 1.7784929644018248e-05, "lr": 1.7784929644018248e-05, "data_time": 0.008906364440917969, "loss": 0.51953125, "time": 1.0156605243682861, "tflops": 2.9160948604468224, "tokens_per_sec": 48.24446635890319, "iter": 2350, "memory": 16133, "step": 2350} +{"base_lr": 1.776233395019101e-05, "lr": 1.776233395019101e-05, "data_time": 0.009368419647216797, "loss": 0.07666015625, "time": 0.9980580806732178, "tflops": 2.7857289132931142, "tokens_per_sec": 46.08950209483364, "iter": 2360, "memory": 16133, "step": 2360} +{"base_lr": 1.7739638101359147e-05, "lr": 1.7739638101359147e-05, "data_time": 0.009354591369628906, "loss": 0.275390625, "time": 0.9994604587554932, "tflops": 3.689670401676651, "tokens_per_sec": 61.032929782829896, "iter": 2370, "memory": 16133, "step": 2370} +{"base_lr": 1.7716842390360197e-05, "lr": 1.7716842390360197e-05, "data_time": 0.009092569351196289, "loss": 0.004730224609375, "time": 1.0271399021148682, "tflops": 3.766960177850757, "tokens_per_sec": 62.308941428682196, "iter": 2380, "memory": 16133, "step": 2380} +{"base_lr": 1.7693947111320203e-05, "lr": 1.7693947111320203e-05, "data_time": 0.00919651985168457, "loss": 0.004180908203125, "time": 1.0134422779083252, "tflops": 4.116418900855501, "tokens_per_sec": 68.08478539334587, "iter": 2390, "memory": 16133, "step": 2390} +{"base_lr": 1.7670952559649897e-05, "lr": 1.7670952559649897e-05, "data_time": 0.009264945983886719, "loss": 0.03564453125, "time": 1.0119457244873047, "tflops": 4.481342458453572, "tokens_per_sec": 74.11464684820335, "iter": 2400, "memory": 16133, "step": 2400} +{"base_lr": 1.7647859032040907e-05, "lr": 1.7647859032040907e-05, "data_time": 0.008965492248535156, "loss": 0.1865234375, "time": 0.9959022998809814, "tflops": 3.9458724670338508, "tokens_per_sec": 65.26744642290993, "iter": 2410, "memory": 16133, "step": 2410} +{"base_lr": 1.7624666826461906e-05, "lr": 1.7624666826461906e-05, "data_time": 0.008770227432250977, "loss": 1.5078125, "time": 1.0010662078857422, "tflops": 4.590506924672769, "tokens_per_sec": 75.9190545053324, "iter": 2420, "memory": 16133, "step": 2420} +{"base_lr": 1.760137624215477e-05, "lr": 1.760137624215477e-05, "data_time": 0.008911848068237305, "loss": 0.236328125, "time": 0.9800698757171631, "tflops": 4.997659511829058, "tokens_per_sec": 82.64716833648811, "iter": 2430, "memory": 16133, "step": 2430} +{"base_lr": 1.7577987579630746e-05, "lr": 1.7577987579630746e-05, "data_time": 0.008982419967651367, "loss": 0.66015625, "time": 1.0140538215637207, "tflops": 7.51809155099508, "tokens_per_sec": 124.25375983059514, "iter": 2440, "memory": 16133, "step": 2440} +{"base_lr": 1.755450114066654e-05, "lr": 1.755450114066654e-05, "data_time": 0.008801698684692383, "loss": 0.93359375, "time": 1.2994318008422852, "tflops": 3.024170612424416, "tokens_per_sec": 50.02186336967996, "iter": 2450, "memory": 16133, "step": 2450} +{"base_lr": 1.7530917228300436e-05, "lr": 1.7530917228300436e-05, "data_time": 0.008737325668334961, "loss": 1.59375, "time": 0.9831006526947021, "tflops": 5.721275152399855, "tokens_per_sec": 94.59865553439539, "iter": 2460, "memory": 16133, "step": 2460} +{"base_lr": 1.7507236146828404e-05, "lr": 1.7507236146828404e-05, "data_time": 0.009652376174926758, "loss": 0.0024261474609375, "time": 1.0070018768310547, "tflops": 4.924113155172383, "tokens_per_sec": 81.42983830175696, "iter": 2470, "memory": 16133, "step": 2470} +{"base_lr": 1.748345820180014e-05, "lr": 1.748345820180014e-05, "data_time": 0.009114742279052734, "loss": 1.0078125, "time": 0.970496416091919, "tflops": 4.423290237548896, "tokens_per_sec": 73.15843605670997, "iter": 2480, "memory": 16133, "step": 2480} +{"base_lr": 1.7459583700015158e-05, "lr": 1.7459583700015158e-05, "data_time": 0.00874638557434082, "loss": 0.31640625, "time": 1.2873003482818604, "tflops": 2.253770670105107, "tokens_per_sec": 37.28733551888459, "iter": 2490, "memory": 16133, "step": 2490} +{"base_lr": 1.7435612949518786e-05, "lr": 1.7435612949518786e-05, "data_time": 0.009273290634155273, "loss": 0.1259765625, "time": 0.997673511505127, "tflops": 3.2718116842695726, "tokens_per_sec": 54.1259233378658, "iter": 2500, "memory": 16133, "step": 2500} +{"base_lr": 1.741154625959824e-05, "lr": 1.741154625959824e-05, "data_time": 0.009348154067993164, "loss": 1.5703125, "time": 1.011430025100708, "tflops": 5.501157652218687, "tokens_per_sec": 90.96032124491123, "iter": 2510, "memory": 16133, "step": 2510} +{"base_lr": 1.738738394077862e-05, "lr": 1.738738394077862e-05, "data_time": 0.008783817291259766, "loss": 1.140625, "time": 0.9921519756317139, "tflops": 4.936799660673375, "tokens_per_sec": 81.64071834694961, "iter": 2520, "memory": 16133, "step": 2520} +{"base_lr": 1.7363126304818878e-05, "lr": 1.7363126304818878e-05, "data_time": 0.0087890625, "loss": 0.002044677734375, "time": 1.0279045104980469, "tflops": 3.823023855642127, "tokens_per_sec": 63.23544583770971, "iter": 2530, "memory": 16133, "step": 2530} +{"base_lr": 1.733877366470781e-05, "lr": 1.733877366470781e-05, "data_time": 0.009189128875732422, "loss": 0.051025390625, "time": 1.303208351135254, "tflops": 2.4119090900633493, "tokens_per_sec": 39.90152453724052, "iter": 2540, "memory": 16133, "step": 2540} +{"base_lr": 1.731432633466005e-05, "lr": 1.731432633466005e-05, "data_time": 0.008774757385253906, "loss": 0.271484375, "time": 1.0078623294830322, "tflops": 3.238735843716952, "tokens_per_sec": 53.578746243690745, "iter": 2550, "memory": 16133, "step": 2550} +{"base_lr": 1.7289784630111958e-05, "lr": 1.7289784630111958e-05, "data_time": 0.008786678314208984, "loss": 0.083984375, "time": 1.3403632640838623, "tflops": 1.9840630000611743, "tokens_per_sec": 32.826921759931366, "iter": 2560, "memory": 16133, "step": 2560} +{"base_lr": 1.726514886771759e-05, "lr": 1.726514886771759e-05, "data_time": 0.008836746215820312, "loss": 0.8828125, "time": 1.413316011428833, "tflops": 2.780484642627356, "tokens_per_sec": 45.991129707955665, "iter": 2570, "memory": 16133, "step": 2570} +{"base_lr": 1.724041936534461e-05, "lr": 1.724041936534461e-05, "data_time": 0.008799076080322266, "loss": 1.390625, "time": 0.9931337833404541, "tflops": 7.188300452892283, "tokens_per_sec": 118.8158151291384, "iter": 2580, "memory": 16133, "step": 2580} +{"base_lr": 1.7215596442070162e-05, "lr": 1.7215596442070162e-05, "data_time": 0.008746862411499023, "loss": 0.07373046875, "time": 0.9789056777954102, "tflops": 5.127285049847124, "tokens_per_sec": 84.78855714356382, "iter": 2590, "memory": 16133, "step": 2590} +{"base_lr": 1.719068041817679e-05, "lr": 1.719068041817679e-05, "data_time": 0.009588241577148438, "loss": 1.328125, "time": 0.9937088489532471, "tflops": 5.477393319268516, "tokens_per_sec": 90.56978821786042, "iter": 2600, "memory": 16133, "step": 2600} +{"base_lr": 1.7165671615148274e-05, "lr": 1.7165671615148274e-05, "data_time": 0.008587837219238281, "loss": 0.1923828125, "time": 0.9900681972503662, "tflops": 3.3580435787656167, "tokens_per_sec": 55.55172881291547, "iter": 2610, "memory": 16133, "step": 2610} +{"base_lr": 1.7140570355665507e-05, "lr": 1.7140570355665507e-05, "data_time": 0.009155511856079102, "loss": 1.4765625, "time": 0.9990236759185791, "tflops": 5.266435282816593, "tokens_per_sec": 87.08502320519925, "iter": 2620, "memory": 16133, "step": 2620} +{"base_lr": 1.7115376963602302e-05, "lr": 1.7115376963602302e-05, "data_time": 0.008867502212524414, "loss": 1.0078125, "time": 0.9888744354248047, "tflops": 6.912902751331066, "tokens_per_sec": 114.2713330953315, "iter": 2630, "memory": 16133, "step": 2630} +{"base_lr": 1.709009176402123e-05, "lr": 1.709009176402123e-05, "data_time": 0.008832931518554688, "loss": 0.94140625, "time": 0.9761953353881836, "tflops": 4.521459958437849, "tokens_per_sec": 74.78011557071906, "iter": 2640, "memory": 16133, "step": 2640} +{"base_lr": 1.706471508316945e-05, "lr": 1.706471508316945e-05, "data_time": 0.008678436279296875, "loss": 1.5078125, "time": 0.974231481552124, "tflops": 4.592697949710033, "tokens_per_sec": 75.95730727365624, "iter": 2650, "memory": 16133, "step": 2650} +{"base_lr": 1.7039247248474455e-05, "lr": 1.7039247248474455e-05, "data_time": 0.009337663650512695, "loss": 0.2734375, "time": 0.9974789619445801, "tflops": 2.90861249135226, "tokens_per_sec": 48.121315668027854, "iter": 2660, "memory": 16133, "step": 2660} +{"base_lr": 1.701368858853986e-05, "lr": 1.701368858853986e-05, "data_time": 0.008939266204833984, "loss": 0.107421875, "time": 0.984154462814331, "tflops": 4.054458500127468, "tokens_per_sec": 67.06264361307315, "iter": 2670, "memory": 16133, "step": 2670} +{"base_lr": 1.6988039433141218e-05, "lr": 1.6988039433141218e-05, "data_time": 0.008816719055175781, "loss": 1.0546875, "time": 0.9927268028259277, "tflops": 5.299840318856845, "tokens_per_sec": 87.63740411990025, "iter": 2680, "memory": 16133, "step": 2680} +{"base_lr": 1.6962300113221653e-05, "lr": 1.6962300113221653e-05, "data_time": 0.009046554565429688, "loss": 0.01495361328125, "time": 0.9669690132141113, "tflops": 4.189094934493913, "tokens_per_sec": 69.28867325047905, "iter": 2690, "memory": 16133, "step": 2690} +{"base_lr": 1.6936470960887695e-05, "lr": 1.6936470960887695e-05, "data_time": 0.008945703506469727, "loss": 0.259765625, "time": 1.0180788040161133, "tflops": 2.8497595246342353, "tokens_per_sec": 47.14762728641696, "iter": 2700, "memory": 16133, "step": 2700} +{"base_lr": 1.6910552309404933e-05, "lr": 1.6910552309404933e-05, "data_time": 0.00936126708984375, "loss": 0.0137939453125, "time": 1.0246753692626953, "tflops": 4.307538029090775, "tokens_per_sec": 71.24207548040886, "iter": 2710, "memory": 16133, "step": 2710} +{"base_lr": 1.6884544493193754e-05, "lr": 1.6884544493193754e-05, "data_time": 0.00883936882019043, "loss": 0.255859375, "time": 0.9940416812896729, "tflops": 2.9186701354504083, "tokens_per_sec": 48.28771358729783, "iter": 2720, "memory": 16133, "step": 2720} +{"base_lr": 1.6858447847825e-05, "lr": 1.6858447847825e-05, "data_time": 0.008939504623413086, "loss": 1.1171875, "time": 0.9999771118164062, "tflops": 4.353406560476261, "tokens_per_sec": 72.00164798686617, "iter": 2730, "memory": 16133, "step": 2730} +{"base_lr": 1.6832262710015645e-05, "lr": 1.6832262710015645e-05, "data_time": 0.008743762969970703, "loss": 0.10009765625, "time": 0.9706697463989258, "tflops": 3.7367783962492696, "tokens_per_sec": 61.81299069279882, "iter": 2740, "memory": 16133, "step": 2740} +{"base_lr": 1.6805989417624473e-05, "lr": 1.6805989417624473e-05, "data_time": 0.008753299713134766, "loss": 0.006988525390625, "time": 1.3318495750427246, "tflops": 3.6776341926446134, "tokens_per_sec": 60.817679051585664, "iter": 2750, "memory": 16133, "step": 2750} +{"base_lr": 1.6779628309647667e-05, "lr": 1.6779628309647667e-05, "data_time": 0.008638858795166016, "loss": 0.2431640625, "time": 0.9679028987884521, "tflops": 2.9974905253456785, "tokens_per_sec": 49.59175146601295, "iter": 2760, "memory": 16133, "step": 2760} +{"base_lr": 1.6753179726214505e-05, "lr": 1.6753179726214505e-05, "data_time": 0.009190559387207031, "loss": 1.4453125, "time": 0.993187427520752, "tflops": 4.139438392645138, "tokens_per_sec": 68.46643253396472, "iter": 2770, "memory": 16133, "step": 2770} +{"base_lr": 1.6726644008582904e-05, "lr": 1.6726644008582904e-05, "data_time": 0.014373779296875, "loss": 1.78125, "time": 0.9902448654174805, "tflops": 4.946307431314039, "tokens_per_sec": 81.79795001084824, "iter": 2780, "memory": 16133, "step": 2780} +{"base_lr": 1.6700021499135056e-05, "lr": 1.6700021499135056e-05, "data_time": 0.009682655334472656, "loss": 0.05859375, "time": 1.0084917545318604, "tflops": 4.076620507899295, "tokens_per_sec": 67.42742287615233, "iter": 2790, "memory": 16133, "step": 2790} +{"base_lr": 1.6673312541372995e-05, "lr": 1.6673312541372995e-05, "data_time": 0.008786439895629883, "loss": 0.00107574462890625, "time": 1.0149304866790771, "tflops": 4.468163485396874, "tokens_per_sec": 73.89668650641414, "iter": 2800, "memory": 16133, "step": 2800} +{"base_lr": 1.66465174799142e-05, "lr": 1.66465174799142e-05, "data_time": 0.01021885871887207, "loss": 1.25, "time": 1.012080192565918, "tflops": 5.736955387451764, "tokens_per_sec": 94.8541436785925, "iter": 2810, "memory": 16133, "step": 2810} +{"base_lr": 1.6619636660487074e-05, "lr": 1.6619636660487074e-05, "data_time": 0.008965730667114258, "loss": 1.5703125, "time": 0.9998369216918945, "tflops": 4.5356150012160805, "tokens_per_sec": 75.01223286794831, "iter": 2820, "memory": 16133, "step": 2820} +{"base_lr": 1.6592670429926574e-05, "lr": 1.6592670429926574e-05, "data_time": 0.008708953857421875, "loss": 0.04638671875, "time": 1.0279772281646729, "tflops": 4.058215331438902, "tokens_per_sec": 67.1221094295288, "iter": 2830, "memory": 16133, "step": 2830} +{"base_lr": 1.656561913616965e-05, "lr": 1.656561913616965e-05, "data_time": 0.008490562438964844, "loss": 1.578125, "time": 0.9980182647705078, "tflops": 6.6674544708492505, "tokens_per_sec": 110.21842373314085, "iter": 2840, "memory": 16133, "step": 2840} +{"base_lr": 1.6538483128250825e-05, "lr": 1.6538483128250825e-05, "data_time": 0.009083271026611328, "loss": 0.09423828125, "time": 1.28802490234375, "tflops": 3.6147818066842032, "tokens_per_sec": 59.78145287395254, "iter": 2850, "memory": 16133, "step": 2850} +{"base_lr": 1.651126275629765e-05, "lr": 1.651126275629765e-05, "data_time": 0.009303092956542969, "loss": 0.1943359375, "time": 0.971623420715332, "tflops": 2.737032786788297, "tokens_per_sec": 45.28503436811031, "iter": 2860, "memory": 16133, "step": 2860} +{"base_lr": 1.6483958371526206e-05, "lr": 1.6483958371526206e-05, "data_time": 0.008953332901000977, "loss": 0.3671875, "time": 0.9972219467163086, "tflops": 3.0306660589939294, "tokens_per_sec": 50.13928961811542, "iter": 2870, "memory": 16133, "step": 2870} +{"base_lr": 1.645657032623656e-05, "lr": 1.645657032623656e-05, "data_time": 0.008953094482421875, "loss": 1.2890625, "time": 1.2781074047088623, "tflops": 3.832272247692585, "tokens_per_sec": 63.37495558003395, "iter": 2880, "memory": 16133, "step": 2880} +{"base_lr": 1.642909897380823e-05, "lr": 1.642909897380823e-05, "data_time": 0.008804559707641602, "loss": 1.703125, "time": 0.9786138534545898, "tflops": 3.8300941671699884, "tokens_per_sec": 63.354917551055905, "iter": 2890, "memory": 16133, "step": 2890} +{"base_lr": 1.6401544668695607e-05, "lr": 1.6401544668695607e-05, "data_time": 0.00880575180053711, "loss": 1.1484375, "time": 0.978473424911499, "tflops": 5.1295500922008594, "tokens_per_sec": 84.82601355005872, "iter": 2900, "memory": 16133, "step": 2900} +{"base_lr": 1.637390776642341e-05, "lr": 1.637390776642341e-05, "data_time": 0.00871586799621582, "loss": 1.234375, "time": 0.9933192729949951, "tflops": 5.113831422268679, "tokens_per_sec": 84.56495538100636, "iter": 2910, "memory": 16133, "step": 2910} +{"base_lr": 1.6346188623582078e-05, "lr": 1.6346188623582078e-05, "data_time": 0.008947372436523438, "loss": 0.380859375, "time": 1.329230785369873, "tflops": 3.8215089256864485, "tokens_per_sec": 63.194443677109746, "iter": 2920, "memory": 16133, "step": 2920} +{"base_lr": 1.631838759782318e-05, "lr": 1.631838759782318e-05, "data_time": 0.009073019027709961, "loss": 0.287109375, "time": 0.9708282947540283, "tflops": 5.1075882478715915, "tokens_per_sec": 84.46395767718253, "iter": 2930, "memory": 16133, "step": 2930} +{"base_lr": 1.6290505047854786e-05, "lr": 1.6290505047854786e-05, "data_time": 0.008533716201782227, "loss": 1.453125, "time": 1.2384631633758545, "tflops": 4.248243864505666, "tokens_per_sec": 70.24835503607675, "iter": 2940, "memory": 16133, "step": 2940} +{"base_lr": 1.6262541333436852e-05, "lr": 1.6262541333436852e-05, "data_time": 0.008906364440917969, "loss": 1.0078125, "time": 1.3177824020385742, "tflops": 3.4412929887219827, "tokens_per_sec": 56.91379690904969, "iter": 2950, "memory": 16133, "step": 2950} +{"base_lr": 1.6234496815376572e-05, "lr": 1.6234496815376572e-05, "data_time": 0.009525775909423828, "loss": 1.5703125, "time": 1.020555019378662, "tflops": 4.502845287217622, "tokens_per_sec": 74.46928245592885, "iter": 2960, "memory": 16133, "step": 2960} +{"base_lr": 1.6206371855523726e-05, "lr": 1.6206371855523726e-05, "data_time": 0.008729696273803711, "loss": 0.003936767578125, "time": 0.9787065982818604, "tflops": 5.252041611713346, "tokens_per_sec": 86.8493174043502, "iter": 2970, "memory": 16133, "step": 2970} +{"base_lr": 1.617816681676601e-05, "lr": 1.617816681676601e-05, "data_time": 0.00886082649230957, "loss": 0.00567626953125, "time": 0.9916579723358154, "tflops": 4.817174047998781, "tokens_per_sec": 79.66456399663548, "iter": 2980, "memory": 16133, "step": 2980} +{"base_lr": 1.6149882063024367e-05, "lr": 1.6149882063024367e-05, "data_time": 0.009074687957763672, "loss": 1.2734375, "time": 1.3574390411376953, "tflops": 4.4112057773504, "tokens_per_sec": 72.93145179981953, "iter": 2990, "memory": 16133, "step": 2990} +{"base_lr": 1.612151795924825e-05, "lr": 1.612151795924825e-05, "data_time": 0.009520292282104492, "loss": 0.130859375, "time": 1.0063247680664062, "tflops": 2.943147708428074, "tokens_per_sec": 48.69203417709962, "iter": 3000, "memory": 16133, "step": 3000} +{"base_lr": 1.6093074871410968e-05, "lr": 1.6093074871410968e-05, "data_time": 0.008962154388427734, "loss": 1.484375, "time": 1.0006539821624756, "tflops": 4.77386702494688, "tokens_per_sec": 78.94836917472423, "iter": 3010, "memory": 16133, "step": 3010} +{"base_lr": 1.6064553166504916e-05, "lr": 1.6064553166504916e-05, "data_time": 0.009066343307495117, "loss": 0.003143310546875, "time": 0.9957065582275391, "tflops": 3.825111249796361, "tokens_per_sec": 63.27165315862061, "iter": 3020, "memory": 16133, "step": 3020} +{"base_lr": 1.603595321253686e-05, "lr": 1.603595321253686e-05, "data_time": 0.007183551788330078, "loss": 1.0390625, "time": 0.9717576503753662, "tflops": 4.666673155640835, "tokens_per_sec": 77.17973711959165, "iter": 3030, "memory": 16133, "step": 3030} +{"base_lr": 1.6007275378523212e-05, "lr": 1.6007275378523212e-05, "data_time": 0.009069204330444336, "loss": 1.7890625, "time": 0.9727566242218018, "tflops": 6.2801665921740035, "tokens_per_sec": 103.8286427303391, "iter": 3040, "memory": 16133, "step": 3040} +{"base_lr": 1.5978520034485233e-05, "lr": 1.5978520034485233e-05, "data_time": 0.008836746215820312, "loss": 1.8515625, "time": 1.0088610649108887, "tflops": 4.795032396599126, "tokens_per_sec": 79.29734111305702, "iter": 3050, "memory": 16133, "step": 3050} +{"base_lr": 1.5949687551444268e-05, "lr": 1.5949687551444268e-05, "data_time": 0.008797883987426758, "loss": 0.310546875, "time": 0.9915597438812256, "tflops": 3.597037326089389, "tokens_per_sec": 59.50221392510247, "iter": 3060, "memory": 16133, "step": 3060} +{"base_lr": 1.592077830141697e-05, "lr": 1.592077830141697e-05, "data_time": 0.00867772102355957, "loss": 1.890625, "time": 1.0025248527526855, "tflops": 5.731230269551916, "tokens_per_sec": 94.760743076901, "iter": 3070, "memory": 16133, "step": 3070} +{"base_lr": 1.5891792657410487e-05, "lr": 1.5891792657410487e-05, "data_time": 0.008965492248535156, "loss": 1.1640625, "time": 1.0090394020080566, "tflops": 4.074407957102848, "tokens_per_sec": 67.39082722102626, "iter": 3080, "memory": 16133, "step": 3080} +{"base_lr": 1.586273099341766e-05, "lr": 1.586273099341766e-05, "data_time": 0.009026050567626953, "loss": 1.3203125, "time": 0.986558198928833, "tflops": 5.0261517205683575, "tokens_per_sec": 83.11724547923208, "iter": 3090, "memory": 16133, "step": 3090} +{"base_lr": 1.583359368441219e-05, "lr": 1.583359368441219e-05, "data_time": 0.008771181106567383, "loss": 0.1572265625, "time": 1.0032927989959717, "tflops": 2.8917577914209236, "tokens_per_sec": 47.842464381272684, "iter": 3100, "memory": 16133, "step": 3100} +{"base_lr": 1.5804381106343806e-05, "lr": 1.5804381106343806e-05, "data_time": 0.009276866912841797, "loss": 0.1455078125, "time": 0.9856677055358887, "tflops": 2.943466395701466, "tokens_per_sec": 48.69795340799425, "iter": 3110, "memory": 16133, "step": 3110} +{"base_lr": 1.5775093636133404e-05, "lr": 1.5775093636133404e-05, "data_time": 0.00979924201965332, "loss": 0.271484375, "time": 1.0543925762176514, "tflops": 3.2679304155850413, "tokens_per_sec": 54.05956119723267, "iter": 3120, "memory": 16133, "step": 3120} +{"base_lr": 1.5745731651668188e-05, "lr": 1.5745731651668188e-05, "data_time": 0.008790969848632812, "loss": 0.11669921875, "time": 1.2294678688049316, "tflops": 3.688486259672898, "tokens_per_sec": 61.00200086793692, "iter": 3130, "memory": 16133, "step": 3130} +{"base_lr": 1.571629553179681e-05, "lr": 1.571629553179681e-05, "data_time": 0.009827852249145508, "loss": 0.341796875, "time": 1.064455270767212, "tflops": 3.1233742211831848, "tokens_per_sec": 51.66962061290445, "iter": 3140, "memory": 16133, "step": 3140} +{"base_lr": 1.568678565632445e-05, "lr": 1.568678565632445e-05, "data_time": 0.00871729850769043, "loss": 0.154296875, "time": 0.9792685508728027, "tflops": 2.9627008505350076, "tokens_per_sec": 49.0161763667071, "iter": 3150, "memory": 16133, "step": 3150} +{"base_lr": 1.5657202406007956e-05, "lr": 1.5657202406007956e-05, "data_time": 0.009041786193847656, "loss": 1.3984375, "time": 1.3106389045715332, "tflops": 4.383893047913124, "tokens_per_sec": 72.48373268072977, "iter": 3160, "memory": 16133, "step": 3160} +{"base_lr": 1.5627546162550886e-05, "lr": 1.5627546162550886e-05, "data_time": 0.009351730346679688, "loss": 0.203125, "time": 1.012131690979004, "tflops": 2.687240629968313, "tokens_per_sec": 44.46061752737771, "iter": 3170, "memory": 16133, "step": 3170} +{"base_lr": 1.5597817308598624e-05, "lr": 1.5597817308598624e-05, "data_time": 0.009191751480102539, "loss": 0.1767578125, "time": 1.2985615730285645, "tflops": 2.3273803645283073, "tokens_per_sec": 38.504142613237214, "iter": 3180, "memory": 16133, "step": 3180} +{"base_lr": 1.5568016227733425e-05, "lr": 1.5568016227733425e-05, "data_time": 0.009057044982910156, "loss": 0.005950927734375, "time": 1.324871301651001, "tflops": 2.9661020357675465, "tokens_per_sec": 49.06136914502606, "iter": 3190, "memory": 16133, "step": 3190} +{"base_lr": 1.553814330446945e-05, "lr": 1.553814330446945e-05, "data_time": 0.009456157684326172, "loss": 1.3984375, "time": 1.4827933311462402, "tflops": 3.26243812155346, "tokens_per_sec": 53.95222538403503, "iter": 3200, "memory": 16133, "step": 3200} +{"base_lr": 1.550819892424782e-05, "lr": 1.550819892424782e-05, "data_time": 0.008918523788452148, "loss": 0.05517578125, "time": 0.9879844188690186, "tflops": 4.4062505802378755, "tokens_per_sec": 72.87564320330894, "iter": 3210, "memory": 16133, "step": 3210} +{"base_lr": 1.5478183473431665e-05, "lr": 1.5478183473431665e-05, "data_time": 0.008935213088989258, "loss": 0.1396484375, "time": 0.9760961532592773, "tflops": 2.97232988664629, "tokens_per_sec": 49.17548321410169, "iter": 3220, "memory": 16133, "step": 3220} +{"base_lr": 1.5448097339301097e-05, "lr": 1.5448097339301097e-05, "data_time": 0.008611202239990234, "loss": 1.3828125, "time": 1.0212745666503906, "tflops": 4.558939520855892, "tokens_per_sec": 75.39598313161923, "iter": 3230, "memory": 16133, "step": 3230} +{"base_lr": 1.5417940910048248e-05, "lr": 1.5417940910048248e-05, "data_time": 0.008686304092407227, "loss": 0.00213623046875, "time": 1.3057105541229248, "tflops": 3.148659674681471, "tokens_per_sec": 52.07892345300452, "iter": 3240, "memory": 16133, "step": 3240} +{"base_lr": 1.538771457477223e-05, "lr": 1.538771457477223e-05, "data_time": 0.008791685104370117, "loss": 0.2392578125, "time": 1.0038917064666748, "tflops": 4.095300461283751, "tokens_per_sec": 67.73638985351015, "iter": 3250, "memory": 16133, "step": 3250} +{"base_lr": 1.5357418723474136e-05, "lr": 1.5357418723474136e-05, "data_time": 0.008708477020263672, "loss": 0.004974365234375, "time": 1.0255343914031982, "tflops": 3.713857272169106, "tokens_per_sec": 61.43138692183511, "iter": 3260, "memory": 16133, "step": 3260} +{"base_lr": 1.5327053747052013e-05, "lr": 1.5327053747052013e-05, "data_time": 0.009549617767333984, "loss": 1.109375, "time": 1.3315346240997314, "tflops": 3.9967708875540278, "tokens_per_sec": 66.08915638181914, "iter": 3270, "memory": 16133, "step": 3270} +{"base_lr": 1.5296620037295813e-05, "lr": 1.5296620037295813e-05, "data_time": 0.00923466682434082, "loss": 1.328125, "time": 1.3701045513153076, "tflops": 4.237821783528653, "tokens_per_sec": 70.06764550031559, "iter": 3280, "memory": 16133, "step": 3280} +{"base_lr": 1.5266117986882298e-05, "lr": 1.5266117986882298e-05, "data_time": 0.008839845657348633, "loss": 0.2021484375, "time": 1.018662452697754, "tflops": 2.8481267380471387, "tokens_per_sec": 47.12061377430086, "iter": 3290, "memory": 16133, "step": 3290} +{"base_lr": 1.5235547989370041e-05, "lr": 1.5235547989370041e-05, "data_time": 0.009140491485595703, "loss": 1.28125, "time": 1.328181505203247, "tflops": 2.776487744988521, "tokens_per_sec": 45.927457776653384, "iter": 3300, "memory": 16133, "step": 3300} +{"base_lr": 1.5204910439194311e-05, "lr": 1.5204910439194311e-05, "data_time": 0.008849859237670898, "loss": 1.171875, "time": 1.2976090908050537, "tflops": 3.6347296322085816, "tokens_per_sec": 60.110552979825115, "iter": 3310, "memory": 16133, "step": 3310} +{"base_lr": 1.5174205731661983e-05, "lr": 1.5174205731661983e-05, "data_time": 0.009252786636352539, "loss": 2.078125, "time": 1.0034749507904053, "tflops": 5.48442620242747, "tokens_per_sec": 90.68487452350605, "iter": 3320, "memory": 16133, "step": 3320} +{"base_lr": 1.5143434262946438e-05, "lr": 1.5143434262946438e-05, "data_time": 0.008826732635498047, "loss": 2.078125, "time": 1.0237200260162354, "tflops": 5.021106991369395, "tokens_per_sec": 83.03051404659044, "iter": 3330, "memory": 16133, "step": 3330} +{"base_lr": 1.5112596430082465e-05, "lr": 1.5112596430082465e-05, "data_time": 0.008801460266113281, "loss": 0.0869140625, "time": 1.0467140674591064, "tflops": 3.2341077314975704, "tokens_per_sec": 53.500761803924384, "iter": 3340, "memory": 16133, "step": 3340} +{"base_lr": 1.5081692630961124e-05, "lr": 1.5081692630961124e-05, "data_time": 0.00915670394897461, "loss": 0.15625, "time": 1.2036550045013428, "tflops": 2.711906517915904, "tokens_per_sec": 44.86335353403575, "iter": 3350, "memory": 16133, "step": 3350} +{"base_lr": 1.5050723264324618e-05, "lr": 1.5050723264324618e-05, "data_time": 0.008967876434326172, "loss": 0.1337890625, "time": 0.9809086322784424, "tflops": 3.1427316301583814, "tokens_per_sec": 51.9926100369673, "iter": 3360, "memory": 16133, "step": 3360} +{"base_lr": 1.5019688729761144e-05, "lr": 1.5019688729761144e-05, "data_time": 0.008917808532714844, "loss": 1.4140625, "time": 1.201887845993042, "tflops": 3.571703746891272, "tokens_per_sec": 59.073731577074255, "iter": 3370, "memory": 16133, "step": 3370} +{"base_lr": 1.4988589427699757e-05, "lr": 1.4988589427699757e-05, "data_time": 0.009138822555541992, "loss": 0.6328125, "time": 0.9920437335968018, "tflops": 2.8635821300028774, "tokens_per_sec": 47.37694358448004, "iter": 3380, "memory": 16133, "step": 3380} +{"base_lr": 1.495742575940516e-05, "lr": 1.495742575940516e-05, "data_time": 0.009204626083374023, "loss": 0.1318359375, "time": 1.016796588897705, "tflops": 3.6267624346715546, "tokens_per_sec": 59.9923334381651, "iter": 3390, "memory": 16133, "step": 3390} +{"base_lr": 1.492619812697257e-05, "lr": 1.492619812697257e-05, "data_time": 0.008824825286865234, "loss": 1.4453125, "time": 0.9841251373291016, "tflops": 4.731033490500384, "tokens_per_sec": 78.24208231170522, "iter": 3400, "memory": 16133, "step": 3400} +{"base_lr": 1.489490693332252e-05, "lr": 1.489490693332252e-05, "data_time": 0.00846552848815918, "loss": 1.03125, "time": 0.9908437728881836, "tflops": 4.821132432313586, "tokens_per_sec": 79.7300262276921, "iter": 3410, "memory": 16133, "step": 3410} +{"base_lr": 1.4863552582195641e-05, "lr": 1.4863552582195641e-05, "data_time": 0.008738517761230469, "loss": 0.001007080078125, "time": 1.31965970993042, "tflops": 2.9778157470339215, "tokens_per_sec": 49.25512199154577, "iter": 3420, "memory": 16133, "step": 3420} +{"base_lr": 1.4832135478147472e-05, "lr": 1.4832135478147472e-05, "data_time": 0.008841514587402344, "loss": 1.46875, "time": 0.985058069229126, "tflops": 5.402563551949068, "tokens_per_sec": 89.33483491869325, "iter": 3430, "memory": 16133, "step": 3430} +{"base_lr": 1.4800656026543233e-05, "lr": 1.4800656026543233e-05, "data_time": 0.0086517333984375, "loss": 2.0, "time": 0.9856381416320801, "tflops": 6.259540046339592, "tokens_per_sec": 103.48625493631837, "iter": 3440, "memory": 16133, "step": 3440} +{"base_lr": 1.47691146335526e-05, "lr": 1.47691146335526e-05, "data_time": 0.009305715560913086, "loss": 1.25, "time": 0.9947657585144043, "tflops": 5.9585685794232255, "tokens_per_sec": 98.5156547268534, "iter": 3450, "memory": 16133, "step": 3450} +{"base_lr": 1.4737511706144447e-05, "lr": 1.4737511706144447e-05, "data_time": 0.009385347366333008, "loss": 1.8515625, "time": 1.0001778602600098, "tflops": 4.655101026068476, "tokens_per_sec": 76.98630719530806, "iter": 3460, "memory": 16133, "step": 3460} +{"base_lr": 1.4705847652081612e-05, "lr": 1.4705847652081612e-05, "data_time": 0.008870124816894531, "loss": 0.1357421875, "time": 0.998504638671875, "tflops": 2.9056247274245286, "tokens_per_sec": 48.071884837508016, "iter": 3470, "memory": 16133, "step": 3470} +{"base_lr": 1.467412287991563e-05, "lr": 1.467412287991563e-05, "data_time": 0.008509159088134766, "loss": 1.1015625, "time": 1.0126943588256836, "tflops": 4.537796936762116, "tokens_per_sec": 75.04732236096807, "iter": 3480, "memory": 16133, "step": 3480} +{"base_lr": 1.4642337798981483e-05, "lr": 1.4642337798981483e-05, "data_time": 0.009043455123901367, "loss": 0.1298828125, "time": 0.9830343723297119, "tflops": 3.1974671047357432, "tokens_per_sec": 52.89743824187074, "iter": 3490, "memory": 16133, "step": 3490} +{"base_lr": 1.4610492819392272e-05, "lr": 1.4610492819392272e-05, "data_time": 0.008821249008178711, "loss": 1.2421875, "time": 1.0021586418151855, "tflops": 4.464713211236948, "tokens_per_sec": 73.84060458321424, "iter": 3500, "memory": 16133, "step": 3500} +{"base_lr": 1.4578588352033964e-05, "lr": 1.4578588352033964e-05, "data_time": 0.010370731353759766, "loss": 0.034423828125, "time": 1.3368675708770752, "tflops": 2.7131915062144962, "tokens_per_sec": 44.88103482126586, "iter": 3510, "memory": 16133, "step": 3510} +{"base_lr": 1.4546624808560078e-05, "lr": 1.4546624808560078e-05, "data_time": 0.008873224258422852, "loss": 0.228515625, "time": 1.2724571228027344, "tflops": 2.993176185759351, "tokens_per_sec": 49.51050913305878, "iter": 3520, "memory": 16133, "step": 3520} +{"base_lr": 1.4514602601386363e-05, "lr": 1.4514602601386363e-05, "data_time": 0.008640050888061523, "loss": 0.94140625, "time": 1.2463462352752686, "tflops": 3.7356625725434585, "tokens_per_sec": 61.78058537877475, "iter": 3530, "memory": 16133, "step": 3530} +{"base_lr": 1.4482522143685513e-05, "lr": 1.4482522143685513e-05, "data_time": 0.00894927978515625, "loss": 0.06005859375, "time": 1.2745084762573242, "tflops": 4.223107035704852, "tokens_per_sec": 69.83084197390697, "iter": 3540, "memory": 16133, "step": 3540} +{"base_lr": 1.4450383849381785e-05, "lr": 1.4450383849381785e-05, "data_time": 0.00927734375, "loss": 1.640625, "time": 1.0097129344940186, "tflops": 4.371369296941274, "tokens_per_sec": 72.2977764333672, "iter": 3550, "memory": 16133, "step": 3550} +{"base_lr": 1.4418188133145694e-05, "lr": 1.4418188133145694e-05, "data_time": 0.009581565856933594, "loss": 0.11865234375, "time": 1.4085474014282227, "tflops": 2.188589877651272, "tokens_per_sec": 36.20751417258049, "iter": 3560, "memory": 16133, "step": 3560} +{"base_lr": 1.4385935410388659e-05, "lr": 1.4385935410388659e-05, "data_time": 0.008999109268188477, "loss": 0.328125, "time": 1.0602531433105469, "tflops": 2.508236052576896, "tokens_per_sec": 41.49952327665106, "iter": 3570, "memory": 16133, "step": 3570} +{"base_lr": 1.4353626097257624e-05, "lr": 1.4353626097257624e-05, "data_time": 0.009169816970825195, "loss": 3.25, "time": 1.007554531097412, "tflops": 3.9602952535481766, "tokens_per_sec": 65.50513938739212, "iter": 3580, "memory": 16133, "step": 3580} +{"base_lr": 1.432126061062971e-05, "lr": 1.432126061062971e-05, "data_time": 0.009079456329345703, "loss": 0.48828125, "time": 1.02058744430542, "tflops": 2.6057200426557037, "tokens_per_sec": 43.112425344309344, "iter": 3590, "memory": 16133, "step": 3590} +{"base_lr": 1.428883936810682e-05, "lr": 1.428883936810682e-05, "data_time": 0.00922250747680664, "loss": 0.0027923583984375, "time": 1.0277109146118164, "tflops": 5.0605182651770795, "tokens_per_sec": 83.68111963897937, "iter": 3600, "memory": 16133, "step": 3600} +{"base_lr": 1.4256362788010269e-05, "lr": 1.4256362788010269e-05, "data_time": 0.008763551712036133, "loss": 0.034423828125, "time": 1.276289701461792, "tflops": 3.3160726186454963, "tokens_per_sec": 54.84648189182362, "iter": 3610, "memory": 16133, "step": 3610} +{"base_lr": 1.4223831289375365e-05, "lr": 1.4223831289375365e-05, "data_time": 0.008897066116333008, "loss": 2.421875, "time": 1.2333135604858398, "tflops": 3.2844242736397153, "tokens_per_sec": 54.32519526790278, "iter": 3620, "memory": 16133, "step": 3620} +{"base_lr": 1.4191245291946015e-05, "lr": 1.4191245291946015e-05, "data_time": 0.009441137313842773, "loss": 1.0234375, "time": 0.9884388446807861, "tflops": 4.89410297455213, "tokens_per_sec": 80.93571031777375, "iter": 3630, "memory": 16133, "step": 3630} +{"base_lr": 1.4158605216169312e-05, "lr": 1.4158605216169312e-05, "data_time": 0.009128093719482422, "loss": 0.00421142578125, "time": 1.011660099029541, "tflops": 4.1834894314164615, "tokens_per_sec": 69.19320043073752, "iter": 3640, "memory": 16133, "step": 3640} +{"base_lr": 1.41259114831901e-05, "lr": 1.41259114831901e-05, "data_time": 0.008787870407104492, "loss": 0.00653076171875, "time": 1.333857536315918, "tflops": 3.172954545180478, "tokens_per_sec": 52.47936761918804, "iter": 3650, "memory": 16133, "step": 3650} +{"base_lr": 1.4093164514845549e-05, "lr": 1.4093164514845549e-05, "data_time": 0.009134769439697266, "loss": 1.0859375, "time": 1.006432056427002, "tflops": 4.6863155672082915, "tokens_per_sec": 77.50150594053534, "iter": 3660, "memory": 16133, "step": 3660} +{"base_lr": 1.4060364733659713e-05, "lr": 1.4060364733659713e-05, "data_time": 0.009139537811279297, "loss": 0.1640625, "time": 0.9896116256713867, "tflops": 3.6041183402313397, "tokens_per_sec": 59.619348105285994, "iter": 3670, "memory": 16134, "step": 3670} +{"base_lr": 1.4027512562838062e-05, "lr": 1.4027512562838062e-05, "data_time": 0.00873255729675293, "loss": 0.1845703125, "time": 1.0156748294830322, "tflops": 3.0351570162726618, "tokens_per_sec": 50.21292102503736, "iter": 3680, "memory": 16133, "step": 3680} +{"base_lr": 1.3994608426262038e-05, "lr": 1.3994608426262038e-05, "data_time": 0.010228395462036133, "loss": 0.09423828125, "time": 0.9775562286376953, "tflops": 3.7104543267972, "tokens_per_sec": 61.37754355425011, "iter": 3690, "memory": 16133, "step": 3690} +{"base_lr": 1.3961652748483592e-05, "lr": 1.3961652748483592e-05, "data_time": 0.009495019912719727, "loss": 0.82421875, "time": 1.0172357559204102, "tflops": 5.172147660565009, "tokens_per_sec": 85.52589652257707, "iter": 3700, "memory": 16133, "step": 3700} +{"base_lr": 1.3928645954719679e-05, "lr": 1.3928645954719679e-05, "data_time": 0.009466171264648438, "loss": 0.1337890625, "time": 1.3928873538970947, "tflops": 2.082924911663934, "tokens_per_sec": 34.46079100773553, "iter": 3710, "memory": 16133, "step": 3710} +{"base_lr": 1.3895588470846793e-05, "lr": 1.3895588470846793e-05, "data_time": 0.009355783462524414, "loss": 1.1328125, "time": 1.010042428970337, "tflops": 4.609637031083698, "tokens_per_sec": 76.23442123953103, "iter": 3720, "memory": 16133, "step": 3720} +{"base_lr": 1.3862480723395475e-05, "lr": 1.3862480723395475e-05, "data_time": 0.008955955505371094, "loss": 0.1298828125, "time": 0.9841020107269287, "tflops": 2.9481494163699122, "tokens_per_sec": 48.77543128328227, "iter": 3730, "memory": 16133, "step": 3730} +{"base_lr": 1.3829323139544796e-05, "lr": 1.3829323139544796e-05, "data_time": 0.009054899215698242, "loss": 0.361328125, "time": 1.03218412399292, "tflops": 2.9866111222789535, "tokens_per_sec": 49.40978921731644, "iter": 3740, "memory": 16133, "step": 3740} +{"base_lr": 1.3796116147116842e-05, "lr": 1.3796116147116842e-05, "data_time": 0.009134531021118164, "loss": 1.40625, "time": 1.2415947914123535, "tflops": 4.530132838406418, "tokens_per_sec": 74.9036647408408, "iter": 3750, "memory": 16133, "step": 3750} +{"base_lr": 1.3762860174571214e-05, "lr": 1.3762860174571214e-05, "data_time": 0.008994340896606445, "loss": 1.8984375, "time": 1.004883050918579, "tflops": 7.52639141547754, "tokens_per_sec": 124.39258467501384, "iter": 3760, "memory": 16133, "step": 3760} +{"base_lr": 1.372955565099949e-05, "lr": 1.372955565099949e-05, "data_time": 0.008923053741455078, "loss": 0.1865234375, "time": 1.3465838432312012, "tflops": 2.0647204898927107, "tokens_per_sec": 34.16051680048851, "iter": 3770, "memory": 16133, "step": 3770} +{"base_lr": 1.3696203006119672e-05, "lr": 1.3696203006119672e-05, "data_time": 0.00899505615234375, "loss": 0.00872802734375, "time": 1.0058338642120361, "tflops": 3.7264436458048436, "tokens_per_sec": 61.640398286359904, "iter": 3780, "memory": 16133, "step": 3780} +{"base_lr": 1.3662802670270674e-05, "lr": 1.3662802670270674e-05, "data_time": 0.008962392807006836, "loss": 2.0, "time": 1.2341103553771973, "tflops": 4.557606466929228, "tokens_per_sec": 75.35792856344669, "iter": 3790, "memory": 16133, "step": 3790} +{"base_lr": 1.3629355074406739e-05, "lr": 1.3629355074406739e-05, "data_time": 0.00923776626586914, "loss": 1.3671875, "time": 1.3311965465545654, "tflops": 2.9520084582010067, "tokens_per_sec": 48.82825167191555, "iter": 3800, "memory": 16133, "step": 3800} +{"base_lr": 1.35958606500919e-05, "lr": 1.35958606500919e-05, "data_time": 0.00881505012512207, "loss": 0.2138671875, "time": 1.0090088844299316, "tflops": 2.8753758399385005, "tokens_per_sec": 47.5714344448725, "iter": 3810, "memory": 16133, "step": 3810} +{"base_lr": 1.3562319829494396e-05, "lr": 1.3562319829494396e-05, "data_time": 0.009497642517089844, "loss": 1.2265625, "time": 0.9851703643798828, "tflops": 5.401947737963513, "tokens_per_sec": 89.32465204158107, "iter": 3820, "memory": 16133, "step": 3820} +{"base_lr": 1.3528733045381118e-05, "lr": 1.3528733045381118e-05, "data_time": 0.008884429931640625, "loss": 0.0019989013671875, "time": 0.9838395118713379, "tflops": 3.8097506420522387, "tokens_per_sec": 63.0184082381569, "iter": 3830, "memory": 16133, "step": 3830} +{"base_lr": 1.3495100731111991e-05, "lr": 1.3495100731111991e-05, "data_time": 0.00897836685180664, "loss": 1.15625, "time": 1.2703604698181152, "tflops": 4.475221318340848, "tokens_per_sec": 73.99474576958815, "iter": 3840, "memory": 16133, "step": 3840} +{"base_lr": 1.346142332063441e-05, "lr": 1.346142332063441e-05, "data_time": 0.009162425994873047, "loss": 0.06201171875, "time": 1.0033824443817139, "tflops": 4.700558834550458, "tokens_per_sec": 77.73705872239573, "iter": 3850, "memory": 16133, "step": 3850} +{"base_lr": 1.3427701248477623e-05, "lr": 1.3427701248477623e-05, "data_time": 0.009229421615600586, "loss": 1.53125, "time": 0.9922597408294678, "tflops": 4.1433084497967405, "tokens_per_sec": 68.53044339286373, "iter": 3860, "memory": 16133, "step": 3860} +{"base_lr": 1.3393934949747153e-05, "lr": 1.3393934949747153e-05, "data_time": 0.008948326110839844, "loss": 1.4765625, "time": 1.2091026306152344, "tflops": 4.201187874351976, "tokens_per_sec": 69.4730107047971, "iter": 3870, "memory": 16133, "step": 3870} +{"base_lr": 1.3360124860119159e-05, "lr": 1.3360124860119159e-05, "data_time": 0.008706092834472656, "loss": 0.0255126953125, "time": 1.1507222652435303, "tflops": 3.520158701553598, "tokens_per_sec": 58.2243014006185, "iter": 3880, "memory": 16133, "step": 3880} +{"base_lr": 1.3326271415834807e-05, "lr": 1.3326271415834807e-05, "data_time": 0.008657693862915039, "loss": 0.19140625, "time": 1.152904987335205, "tflops": 2.8312826190247957, "tokens_per_sec": 46.8382048765071, "iter": 3890, "memory": 16133, "step": 3890} +{"base_lr": 1.3292375053694664e-05, "lr": 1.3292375053694664e-05, "data_time": 0.0090789794921875, "loss": 0.1435546875, "time": 0.981360673904419, "tflops": 3.4494820795786807, "tokens_per_sec": 57.06362756227293, "iter": 3900, "memory": 16133, "step": 3900} +{"base_lr": 1.3258436211053052e-05, "lr": 1.3258436211053052e-05, "data_time": 0.009073495864868164, "loss": 1.0546875, "time": 1.2440695762634277, "tflops": 3.5965439660594782, "tokens_per_sec": 59.48220373831508, "iter": 3910, "memory": 16133, "step": 3910} +{"base_lr": 1.3224455325812411e-05, "lr": 1.3224455325812411e-05, "data_time": 0.00881052017211914, "loss": 0.2275390625, "time": 0.9861588478088379, "tflops": 3.1873364776199935, "tokens_per_sec": 52.729841764830184, "iter": 3920, "memory": 16133, "step": 3920} +{"base_lr": 1.319043283641764e-05, "lr": 1.319043283641764e-05, "data_time": 0.009159326553344727, "loss": 0.9296875, "time": 1.0149836540222168, "tflops": 5.064325676049185, "tokens_per_sec": 83.74519103147615, "iter": 3930, "memory": 16133, "step": 3930} +{"base_lr": 1.3156369181850437e-05, "lr": 1.3156369181850437e-05, "data_time": 0.00910806655883789, "loss": 1.015625, "time": 1.0018503665924072, "tflops": 4.0432435123477415, "tokens_per_sec": 66.87625441294209, "iter": 3940, "memory": 16133, "step": 3940} +{"base_lr": 1.3122264801623653e-05, "lr": 1.3122264801623653e-05, "data_time": 0.009016990661621094, "loss": 0.01165771484375, "time": 1.0115966796875, "tflops": 4.123929063306735, "tokens_per_sec": 68.20900205133839, "iter": 3950, "memory": 16133, "step": 3950} +{"base_lr": 1.30881201357756e-05, "lr": 1.30881201357756e-05, "data_time": 0.009587764739990234, "loss": 0.138671875, "time": 1.0010924339294434, "tflops": 2.898113770758538, "tokens_per_sec": 47.947620392599106, "iter": 3960, "memory": 16133, "step": 3960} +{"base_lr": 1.3053935624864385e-05, "lr": 1.3053935624864385e-05, "data_time": 0.009809017181396484, "loss": 0.94921875, "time": 1.0106091499328613, "tflops": 4.427380187647045, "tokens_per_sec": 73.22316446951116, "iter": 3970, "memory": 16133, "step": 3970} +{"base_lr": 1.3019711709962229e-05, "lr": 1.3019711709962229e-05, "data_time": 0.009257078170776367, "loss": 0.03271484375, "time": 0.9830799102783203, "tflops": 4.797634621620864, "tokens_per_sec": 79.34248191262299, "iter": 3980, "memory": 16133, "step": 3980} +{"base_lr": 1.2985448832649766e-05, "lr": 1.2985448832649766e-05, "data_time": 0.008626461029052734, "loss": 0.0142822265625, "time": 1.0226378440856934, "tflops": 4.020228854573343, "tokens_per_sec": 66.49470327467692, "iter": 3990, "memory": 16133, "step": 3990} +{"base_lr": 1.2951147435010356e-05, "lr": 1.2951147435010356e-05, "data_time": 0.008900642395019531, "loss": 1.8046875, "time": 1.0054302215576172, "tflops": 4.089033809032278, "tokens_per_sec": 67.63273924130354, "iter": 4000, "memory": 16133, "step": 4000} +{"base_lr": 1.2916807959624364e-05, "lr": 1.2916807959624364e-05, "data_time": 0.009252786636352539, "loss": 1.296875, "time": 1.0037798881530762, "tflops": 5.543083785534008, "tokens_per_sec": 91.65355979505176, "iter": 4010, "memory": 16133, "step": 4010} +{"base_lr": 1.2882430849563464e-05, "lr": 1.2882430849563464e-05, "data_time": 0.008749246597290039, "loss": 1.0859375, "time": 0.9890079498291016, "tflops": 4.707675994264583, "tokens_per_sec": 77.85579480248624, "iter": 4020, "memory": 16133, "step": 4020} +{"base_lr": 1.2848016548384929e-05, "lr": 1.2848016548384929e-05, "data_time": 0.008770227432250977, "loss": 1.0859375, "time": 0.9968030452728271, "tflops": 5.399648143805008, "tokens_per_sec": 89.28544151422734, "iter": 4030, "memory": 16133, "step": 4030} +{"base_lr": 1.2813565500125892e-05, "lr": 1.2813565500125892e-05, "data_time": 0.009089469909667969, "loss": 1.390625, "time": 0.9872255325317383, "tflops": 4.470942024009726, "tokens_per_sec": 73.94460292443782, "iter": 4040, "memory": 16133, "step": 4040} +{"base_lr": 1.2779078149297606e-05, "lr": 1.2779078149297606e-05, "data_time": 0.00900125503540039, "loss": 0.66015625, "time": 0.9902074337005615, "tflops": 3.7852505287913893, "tokens_per_sec": 62.613143357481775, "iter": 4050, "memory": 16133, "step": 4050} +{"base_lr": 1.2744554940879755e-05, "lr": 1.2744554940879755e-05, "data_time": 0.009060144424438477, "loss": 0.17578125, "time": 0.9813299179077148, "tflops": 3.0181108116867876, "tokens_per_sec": 49.9322390011532, "iter": 4060, "memory": 16133, "step": 4060} +{"base_lr": 1.2709996320314655e-05, "lr": 1.2709996320314655e-05, "data_time": 0.00896763801574707, "loss": 0.17578125, "time": 0.9897778034210205, "tflops": 2.9923508335225484, "tokens_per_sec": 49.5060606841144, "iter": 4070, "memory": 16133, "step": 4070} +{"base_lr": 1.2675402733501543e-05, "lr": 1.2675402733501543e-05, "data_time": 0.009120941162109375, "loss": 0.04345703125, "time": 0.9844691753387451, "tflops": 3.930234897417405, "tokens_per_sec": 65.00965353020148, "iter": 4080, "memory": 16133, "step": 4080} +{"base_lr": 1.2640774626790823e-05, "lr": 1.2640774626790823e-05, "data_time": 0.009131669998168945, "loss": 0.126953125, "time": 0.9973714351654053, "tflops": 4.304100931228068, "tokens_per_sec": 71.1871199601321, "iter": 4090, "memory": 16133, "step": 4090} +{"base_lr": 1.2606112446978292e-05, "lr": 1.2606112446978292e-05, "data_time": 0.009125709533691406, "loss": 1.5, "time": 0.9915773868560791, "tflops": 4.512356763251712, "tokens_per_sec": 74.62856755391698, "iter": 4100, "memory": 16133, "step": 4100} +{"base_lr": 1.2571416641299383e-05, "lr": 1.2571416641299383e-05, "data_time": 0.009391069412231445, "loss": 1.6484375, "time": 1.011131763458252, "tflops": 4.185675384186027, "tokens_per_sec": 69.22935519354887, "iter": 4110, "memory": 16133, "step": 4110} +{"base_lr": 1.2536687657423391e-05, "lr": 1.2536687657423391e-05, "data_time": 0.009079217910766602, "loss": 0.00994873046875, "time": 0.9986855983734131, "tflops": 4.843888304581704, "tokens_per_sec": 80.10529052408297, "iter": 4120, "memory": 16133, "step": 4120} +{"base_lr": 1.2501925943447699e-05, "lr": 1.2501925943447699e-05, "data_time": 0.009150981903076172, "loss": 0.05322265625, "time": 1.012336015701294, "tflops": 4.06113988320754, "tokens_per_sec": 67.17137288929304, "iter": 4130, "memory": 16133, "step": 4130} +{"base_lr": 1.2467131947892006e-05, "lr": 1.2467131947892006e-05, "data_time": 0.010026216506958008, "loss": 0.004241943359375, "time": 1.0613949298858643, "tflops": 4.50067068752739, "tokens_per_sec": 74.43035365584488, "iter": 4140, "memory": 16133, "step": 4140} +{"base_lr": 1.243230611969251e-05, "lr": 1.243230611969251e-05, "data_time": 0.009114265441894531, "loss": 1.6328125, "time": 1.0191013813018799, "tflops": 4.628055951946801, "tokens_per_sec": 76.5380181315034, "iter": 4150, "memory": 16133, "step": 4150} +{"base_lr": 1.2397448908196162e-05, "lr": 1.2397448908196162e-05, "data_time": 0.008903741836547852, "loss": 1.609375, "time": 1.6103813648223877, "tflops": 4.282575536825313, "tokens_per_sec": 70.79068504527963, "iter": 4160, "memory": 16136, "step": 4160} +{"base_lr": 1.2362560763154815e-05, "lr": 1.2362560763154815e-05, "data_time": 0.008901834487915039, "loss": 0.0257568359375, "time": 1.0015063285827637, "tflops": 5.313834440668632, "tokens_per_sec": 87.86764245857671, "iter": 4170, "memory": 16133, "step": 4170} +{"base_lr": 1.2327642134719464e-05, "lr": 1.2327642134719464e-05, "data_time": 0.009127616882324219, "loss": 1.3984375, "time": 1.2856106758117676, "tflops": 3.1037494494026143, "tokens_per_sec": 51.33747038797306, "iter": 4180, "memory": 16133, "step": 4180} +{"base_lr": 1.229269347343442e-05, "lr": 1.229269347343442e-05, "data_time": 0.008862495422363281, "loss": 2.296875, "time": 1.3148260116577148, "tflops": 4.646298141039297, "tokens_per_sec": 76.81624724824522, "iter": 4190, "memory": 16133, "step": 4190} +{"base_lr": 1.22577152302315e-05, "lr": 1.22577152302315e-05, "data_time": 0.009611845016479492, "loss": 0.007049560546875, "time": 1.008434772491455, "tflops": 4.436926462594263, "tokens_per_sec": 73.3810475585853, "iter": 4200, "memory": 16133, "step": 4200} +{"base_lr": 1.2222707856424208e-05, "lr": 1.2222707856424208e-05, "data_time": 0.009228229522705078, "loss": 1.71875, "time": 1.0133864879608154, "tflops": 5.55029044041086, "tokens_per_sec": 91.77150189464955, "iter": 4210, "memory": 16133, "step": 4210} +{"base_lr": 1.2187671803701902e-05, "lr": 1.2187671803701902e-05, "data_time": 0.009535789489746094, "loss": 0.201171875, "time": 1.2284510135650635, "tflops": 3.642270532940041, "tokens_per_sec": 60.2384622445675, "iter": 4220, "memory": 16133, "step": 4220} +{"base_lr": 1.2152607524123983e-05, "lr": 1.2152607524123983e-05, "data_time": 0.009298086166381836, "loss": 0.875, "time": 0.9663305282592773, "tflops": 5.006073334585817, "tokens_per_sec": 82.78740830430674, "iter": 4230, "memory": 16133, "step": 4230} +{"base_lr": 1.2117515470114048e-05, "lr": 1.2117515470114048e-05, "data_time": 0.009401321411132812, "loss": 1.2421875, "time": 0.9997000694274902, "tflops": 3.6282659661248093, "tokens_per_sec": 60.018001233410814, "iter": 4240, "memory": 16133, "step": 4240} +{"base_lr": 1.2082396094454069e-05, "lr": 1.2082396094454069e-05, "data_time": 0.009211063385009766, "loss": 1.2109375, "time": 1.0195677280426025, "tflops": 4.0323345428604815, "tokens_per_sec": 66.6949317143274, "iter": 4250, "memory": 16133, "step": 4250} +{"base_lr": 1.2047249850278538e-05, "lr": 1.2047249850278538e-05, "data_time": 0.009305953979492188, "loss": 1.4609375, "time": 1.031794548034668, "tflops": 4.219160614096897, "tokens_per_sec": 69.7813340234, "iter": 4260, "memory": 16133, "step": 4260} +{"base_lr": 1.201207719106862e-05, "lr": 1.201207719106862e-05, "data_time": 0.00943899154663086, "loss": 0.1552734375, "time": 0.9861104488372803, "tflops": 3.3715210666683055, "tokens_per_sec": 55.774685345637046, "iter": 4270, "memory": 16133, "step": 4270} +{"base_lr": 1.1976878570646298e-05, "lr": 1.1976878570646298e-05, "data_time": 0.009577751159667969, "loss": 0.1416015625, "time": 1.2193880081176758, "tflops": 2.379291701459504, "tokens_per_sec": 39.36400856857405, "iter": 4280, "memory": 16133, "step": 4280} +{"base_lr": 1.1941654443168541e-05, "lr": 1.1941654443168541e-05, "data_time": 0.00910806655883789, "loss": 0.99609375, "time": 1.0118343830108643, "tflops": 4.960424879055674, "tokens_per_sec": 82.02923461934459, "iter": 4290, "memory": 16133, "step": 4290} +{"base_lr": 1.190640526312141e-05, "lr": 1.190640526312141e-05, "data_time": 0.009569883346557617, "loss": 0.1435546875, "time": 1.3656890392303467, "tflops": 2.1244073030043005, "tokens_per_sec": 35.14709324094446, "iter": 4300, "memory": 16133, "step": 4300} +{"base_lr": 1.1871131485314226e-05, "lr": 1.1871131485314226e-05, "data_time": 0.00888204574584961, "loss": 0.77734375, "time": 0.9838275909423828, "tflops": 5.163168178401214, "tokens_per_sec": 85.38081344054726, "iter": 4310, "memory": 16133, "step": 4310} +{"base_lr": 1.1835833564873684e-05, "lr": 1.1835833564873684e-05, "data_time": 0.009006977081298828, "loss": 1.15625, "time": 0.9757585525512695, "tflops": 4.647538398652238, "tokens_per_sec": 76.86327709229317, "iter": 4320, "memory": 16133, "step": 4320} +{"base_lr": 1.1800511957237978e-05, "lr": 1.1800511957237978e-05, "data_time": 0.009151697158813477, "loss": 1.125, "time": 0.9888310432434082, "tflops": 3.668147114737654, "tokens_per_sec": 60.677706681959286, "iter": 4330, "memory": 16133, "step": 4330} +{"base_lr": 1.1765167118150939e-05, "lr": 1.1765167118150939e-05, "data_time": 0.009471893310546875, "loss": 0.984375, "time": 0.996485710144043, "tflops": 2.850817306048984, "tokens_per_sec": 47.16575413124483, "iter": 4340, "memory": 16133, "step": 4340} +{"base_lr": 1.172979950365613e-05, "lr": 1.172979950365613e-05, "data_time": 0.009004831314086914, "loss": 1.1015625, "time": 0.9955077171325684, "tflops": 5.163403247782491, "tokens_per_sec": 85.38356713571861, "iter": 4350, "memory": 16133, "step": 4350} +{"base_lr": 1.1694409570091004e-05, "lr": 1.1694409570091004e-05, "data_time": 0.009333610534667969, "loss": 1.1640625, "time": 0.9829099178314209, "tflops": 3.8133537407962534, "tokens_per_sec": 63.07800834559343, "iter": 4360, "memory": 16133, "step": 4360} +{"base_lr": 1.1658997774080974e-05, "lr": 1.1658997774080974e-05, "data_time": 0.009241342544555664, "loss": 1.171875, "time": 1.3417644500732422, "tflops": 4.643296957995326, "tokens_per_sec": 76.76459157514631, "iter": 4370, "memory": 16133, "step": 4370} +{"base_lr": 1.1623564572533535e-05, "lr": 1.1623564572533535e-05, "data_time": 0.009459972381591797, "loss": 0.1279296875, "time": 1.0478911399841309, "tflops": 3.461407010552134, "tokens_per_sec": 57.25785600291589, "iter": 4380, "memory": 16133, "step": 4380} +{"base_lr": 1.1588110422632374e-05, "lr": 1.1588110422632374e-05, "data_time": 0.009249210357666016, "loss": 1.109375, "time": 1.0080795288085938, "tflops": 4.378452289164265, "tokens_per_sec": 72.41492155505148, "iter": 4390, "memory": 16133, "step": 4390} +{"base_lr": 1.1552635781831467e-05, "lr": 1.1552635781831467e-05, "data_time": 0.009271383285522461, "loss": 0.045166015625, "time": 1.1679422855377197, "tflops": 3.4682578456379645, "tokens_per_sec": 57.36584832108881, "iter": 4400, "memory": 16133, "step": 4400} +{"base_lr": 1.1517141107849187e-05, "lr": 1.1517141107849187e-05, "data_time": 0.00890660285949707, "loss": 0.1474609375, "time": 0.9980387687683105, "tflops": 2.906981030560156, "tokens_per_sec": 48.09432409042504, "iter": 4410, "memory": 16133, "step": 4410} +{"base_lr": 1.1481626858662382e-05, "lr": 1.1481626858662382e-05, "data_time": 0.009180068969726562, "loss": 2.59375, "time": 0.9995388984680176, "tflops": 5.44544511363878, "tokens_per_sec": 90.04151828193179, "iter": 4420, "memory": 16133, "step": 4420} +{"base_lr": 1.144609349250047e-05, "lr": 1.144609349250047e-05, "data_time": 0.008986473083496094, "loss": 0.1875, "time": 1.002075433731079, "tflops": 2.6538572540468284, "tokens_per_sec": 43.90887004996083, "iter": 4430, "memory": 16133, "step": 4430} +{"base_lr": 1.1410541467839533e-05, "lr": 1.1410541467839533e-05, "data_time": 0.007593631744384766, "loss": 0.0234375, "time": 1.341017723083496, "tflops": 3.2462709805944168, "tokens_per_sec": 53.69056557611458, "iter": 4440, "memory": 16133, "step": 4440} +{"base_lr": 1.1374971243396397e-05, "lr": 1.1374971243396397e-05, "data_time": 0.010661125183105469, "loss": 0.07666015625, "time": 1.0108795166015625, "tflops": 3.887410319844281, "tokens_per_sec": 64.30044227076311, "iter": 4450, "memory": 16133, "step": 4450} +{"base_lr": 1.1339383278122715e-05, "lr": 1.1339383278122715e-05, "data_time": 0.008971929550170898, "loss": 1.203125, "time": 1.0368342399597168, "tflops": 4.7240487899257415, "tokens_per_sec": 78.1224200341502, "iter": 4460, "memory": 16133, "step": 4460} +{"base_lr": 1.130377803119904e-05, "lr": 1.130377803119904e-05, "data_time": 0.009309530258178711, "loss": 1.2578125, "time": 0.9963138103485107, "tflops": 5.280759415937754, "tokens_per_sec": 87.32188502885458, "iter": 4470, "memory": 16133, "step": 4470} +{"base_lr": 1.1268155962028914e-05, "lr": 1.1268155962028914e-05, "data_time": 0.0089874267578125, "loss": 1.3984375, "time": 1.6541407108306885, "tflops": 3.6932007136735105, "tokens_per_sec": 61.058892595188, "iter": 4480, "memory": 16133, "step": 4480} +{"base_lr": 1.123251753023293e-05, "lr": 1.123251753023293e-05, "data_time": 0.009059667587280273, "loss": 1.15625, "time": 0.9944314956665039, "tflops": 6.508748321418948, "tokens_per_sec": 107.59916642440729, "iter": 4490, "memory": 16133, "step": 4490} +{"base_lr": 1.1196863195642791e-05, "lr": 1.1196863195642791e-05, "data_time": 0.008948087692260742, "loss": 0.0035552978515625, "time": 1.2272980213165283, "tflops": 3.1526125204686175, "tokens_per_sec": 52.14707339892453, "iter": 4500, "memory": 16133, "step": 4500} +{"base_lr": 1.1161193418295398e-05, "lr": 1.1161193418295398e-05, "data_time": 0.00943446159362793, "loss": 0.138671875, "time": 1.2930543422698975, "tflops": 2.337292879657515, "tokens_per_sec": 38.668135101103815, "iter": 4510, "memory": 16133, "step": 4510} +{"base_lr": 1.1125508658426907e-05, "lr": 1.1125508658426907e-05, "data_time": 0.009380817413330078, "loss": 0.1396484375, "time": 0.9910192489624023, "tflops": 5.247879756281161, "tokens_per_sec": 86.77934368071585, "iter": 4520, "memory": 16133, "step": 4520} +{"base_lr": 1.108980937646679e-05, "lr": 1.108980937646679e-05, "data_time": 0.009600400924682617, "loss": 0.1435546875, "time": 1.2192633152008057, "tflops": 2.2803271596995573, "tokens_per_sec": 37.72769952681332, "iter": 4530, "memory": 16133, "step": 4530} +{"base_lr": 1.1054096033031876e-05, "lr": 1.1054096033031876e-05, "data_time": 0.009891748428344727, "loss": 1.359375, "time": 1.2980077266693115, "tflops": 3.3072124569643253, "tokens_per_sec": 54.699212139615945, "iter": 4540, "memory": 16133, "step": 4540} +{"base_lr": 1.1018369088920448e-05, "lr": 1.1018369088920448e-05, "data_time": 0.008825063705444336, "loss": 1.1953125, "time": 1.003819227218628, "tflops": 3.975031877255497, "tokens_per_sec": 65.74889004946276, "iter": 4550, "memory": 16133, "step": 4550} +{"base_lr": 1.0982629005106262e-05, "lr": 1.0982629005106262e-05, "data_time": 0.008906126022338867, "loss": 1.65625, "time": 0.9924688339233398, "tflops": 4.87423012649361, "tokens_per_sec": 80.60706519485402, "iter": 4560, "memory": 16133, "step": 4560} +{"base_lr": 1.0946876242732608e-05, "lr": 1.0946876242732608e-05, "data_time": 0.008928060531616211, "loss": 0.1259765625, "time": 1.3468871116638184, "tflops": 2.4235140597620606, "tokens_per_sec": 40.092446896498515, "iter": 4570, "memory": 16133, "step": 4570} +{"base_lr": 1.0911111263106378e-05, "lr": 1.0911111263106378e-05, "data_time": 0.00874018669128418, "loss": 0.00775146484375, "time": 0.9942820072174072, "tflops": 4.013160650832835, "tokens_per_sec": 66.37955783253175, "iter": 4580, "memory": 16133, "step": 4580} +{"base_lr": 1.0875334527692089e-05, "lr": 1.0875334527692089e-05, "data_time": 0.009366750717163086, "loss": 0.045654296875, "time": 0.9909460544586182, "tflops": 3.782429119288392, "tokens_per_sec": 62.56647344320856, "iter": 4590, "memory": 16133, "step": 4590} +{"base_lr": 1.0839546498105935e-05, "lr": 1.0839546498105935e-05, "data_time": 0.008839845657348633, "loss": 1.5, "time": 0.9813961982727051, "tflops": 6.286596004350249, "tokens_per_sec": 103.9335593304926, "iter": 4600, "memory": 16133, "step": 4600} diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/config.py b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/config.py new file mode 100644 index 0000000000000000000000000000000000000000..b2393a84fb0d3988d229c7a3fcbf3d579f748c00 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/config.py @@ -0,0 +1,261 @@ +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a/iter_7452.pth' +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-05, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2b_medium.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b' diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/eval_outputs_iter_4095.txt b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/eval_outputs_iter_4095.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e524c8ac2accdbb9c6904de2dfbc8f5317fddf3 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/eval_outputs_iter_4095.txt @@ -0,0 +1,24 @@ +Eval output 1: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern. This pattern is characterized by the formation of distinct lobules, each containing a cluster of tumor cells. The lobules are separated by fibrous stroma, contributing to the overall architectural organization of the tumor.<|im_end|> + + +Eval output 2: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and nuclear atypia. The tumor exhibits a high degree of pleomorphism, with marked nuclear atypia and prominent nucleoli. The presence of microvascular invasion and lymphovascular invasion is noted, along with the involvement of the perineural space. The tumor is characterized by a high mitotic rate, with a high Ki-67 proliferation index, reflecting a high-grade malignancy. The absence of specific molecular markers such as EGFR, HER2, and p53 further supports the aggressive nature of the tumor.<|im_end|> + + +Eval output 3: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + + diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/eval_outputs_iter_4602.txt b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/eval_outputs_iter_4602.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce19fecc11149806b324ea042b99e72ff99a2c6b --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/eval_outputs_iter_4602.txt @@ -0,0 +1,24 @@ +Eval output 1: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern, which is a characteristic feature of papillary thyroid carcinoma. This pattern is evident in the slide, reflecting the glandular architecture typical of this type of cancer.<|im_end|> + + +Eval output 2: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and nuclear atypia. The tumor is characterized by glandular structures with irregular shapes and sizes, and there is marked nuclear pleomorphism. The presence of microvascular invasion and lymphovascular invasion is noted, suggesting aggressive tumor behavior. The tumor cells exhibit high mitotic activity, indicating a high proliferation rate. The absence of lymphovascular invasion and perineural invasion is a positive finding, but the presence of vascular invasion is concerning. The tumor is poorly differentiated, reflecting a high degree of malignancy. The absence of lymphovascular invasion and perineural invasion is a positive finding, but the presence of vascular invasion is concerning.<|im_end|> + + +Eval output 3: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + + diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/scalars.json b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/scalars.json new file mode 100644 index 0000000000000000000000000000000000000000..dd927faedb8264f20025a135c31b3210aba678e0 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/20250925_230352/vis_data/scalars.json @@ -0,0 +1,460 @@ +{"base_lr": 5.882352941176473e-07, "lr": 5.882352941176473e-07, "data_time": 0.008524894714355469, "loss": 0.21875, "time": 0.9872832298278809, "tflops": 4.164193257175627, "tokens_per_sec": 68.87587872001633, "iter": 10, "memory": 15742, "step": 10} +{"base_lr": 1.0196078431372559e-06, "lr": 1.0196078431372559e-06, "data_time": 0.008862972259521484, "loss": 1.390625, "time": 1.0128672122955322, "tflops": 3.8797814928276297, "tokens_per_sec": 64.17425622122938, "iter": 20, "memory": 15742, "step": 20} +{"base_lr": 1.4509803921568641e-06, "lr": 1.4509803921568641e-06, "data_time": 0.008648395538330078, "loss": 0.69140625, "time": 1.0541915893554688, "tflops": 2.8095105908405205, "tokens_per_sec": 46.481114528633306, "iter": 30, "memory": 15742, "step": 30} +{"base_lr": 1.8823529411764717e-06, "lr": 1.8823529411764717e-06, "data_time": 0.009172916412353516, "loss": 0.318359375, "time": 1.074812650680542, "tflops": 2.3617301920128884, "tokens_per_sec": 39.076577646688165, "iter": 40, "memory": 15742, "step": 40} +{"base_lr": 2.31372549019608e-06, "lr": 2.31372549019608e-06, "data_time": 0.009110689163208008, "loss": 0.0673828125, "time": 1.0274059772491455, "tflops": 3.8837747838296752, "tokens_per_sec": 64.23945495883638, "iter": 50, "memory": 15743, "step": 50} +{"base_lr": 2.7450980392156876e-06, "lr": 2.7450980392156876e-06, "data_time": 0.0076406002044677734, "loss": 1.5703125, "time": 0.9788403511047363, "tflops": 5.251323950870456, "tokens_per_sec": 86.83744995185444, "iter": 60, "memory": 15742, "step": 60} +{"base_lr": 3.176470588235296e-06, "lr": 3.176470588235296e-06, "data_time": 0.008695602416992188, "loss": 2.046875, "time": 0.9778482913970947, "tflops": 5.318565160331918, "tokens_per_sec": 87.94820296412246, "iter": 70, "memory": 16133, "step": 70} +{"base_lr": 3.607843137254902e-06, "lr": 3.607843137254902e-06, "data_time": 0.008966684341430664, "loss": 0.08203125, "time": 0.9880132675170898, "tflops": 4.773679026825057, "tokens_per_sec": 78.94630827775993, "iter": 80, "memory": 16133, "step": 80} +{"base_lr": 4.0392156862745096e-06, "lr": 4.0392156862745096e-06, "data_time": 0.00854039192199707, "loss": 1.2265625, "time": 1.202319860458374, "tflops": 4.375951615090016, "tokens_per_sec": 72.36011219739783, "iter": 90, "memory": 16133, "step": 90} +{"base_lr": 4.470588235294117e-06, "lr": 4.470588235294117e-06, "data_time": 0.00881052017211914, "loss": 1.015625, "time": 1.040710210800171, "tflops": 4.4737996564554905, "tokens_per_sec": 73.98793554713278, "iter": 100, "memory": 16133, "step": 100} +{"base_lr": 4.901960784313724e-06, "lr": 4.901960784313724e-06, "data_time": 0.00935053825378418, "loss": 1.984375, "time": 0.9840950965881348, "tflops": 4.8541945441949705, "tokens_per_sec": 80.27679466528522, "iter": 110, "memory": 16133, "step": 110} +{"base_lr": 5.333333333333329e-06, "lr": 5.333333333333329e-06, "data_time": 0.00858759880065918, "loss": 0.275390625, "time": 1.0231690406799316, "tflops": 3.308530578668718, "tokens_per_sec": 54.73191405667562, "iter": 120, "memory": 16133, "step": 120} +{"base_lr": 5.7647058823529375e-06, "lr": 5.7647058823529375e-06, "data_time": 0.008618593215942383, "loss": 1.578125, "time": 1.0206077098846436, "tflops": 4.621225342207601, "tokens_per_sec": 76.42505464586358, "iter": 130, "memory": 16133, "step": 130} +{"base_lr": 6.196078431372542e-06, "lr": 6.196078431372542e-06, "data_time": 0.00869297981262207, "loss": 0.0869140625, "time": 1.3174896240234375, "tflops": 3.2123739385149777, "tokens_per_sec": 53.13134822737822, "iter": 140, "memory": 16133, "step": 140} +{"base_lr": 6.62745098039215e-06, "lr": 6.62745098039215e-06, "data_time": 0.009142637252807617, "loss": 1.515625, "time": 0.9733965396881104, "tflops": 4.783178071538082, "tokens_per_sec": 79.10445215327431, "iter": 150, "memory": 16133, "step": 150} +{"base_lr": 7.058823529411758e-06, "lr": 7.058823529411758e-06, "data_time": 0.009024381637573242, "loss": 0.09521484375, "time": 1.3030588626861572, "tflops": 2.505028702464441, "tokens_per_sec": 41.44095216745745, "iter": 160, "memory": 16133, "step": 160} +{"base_lr": 7.4901960784313645e-06, "lr": 7.4901960784313645e-06, "data_time": 0.009188175201416016, "loss": 1.3359375, "time": 0.9896304607391357, "tflops": 4.46007706480148, "tokens_per_sec": 73.7649081106537, "iter": 170, "memory": 16133, "step": 170} +{"base_lr": 7.921568627450973e-06, "lr": 7.921568627450973e-06, "data_time": 0.008887767791748047, "loss": 0.10107421875, "time": 1.0033645629882812, "tflops": 4.519668631000321, "tokens_per_sec": 74.74850395010533, "iter": 180, "memory": 16133, "step": 180} +{"base_lr": 8.35294117647058e-06, "lr": 8.35294117647058e-06, "data_time": 0.008906126022338867, "loss": 2.03125, "time": 1.0010943412780762, "tflops": 5.920900506742169, "tokens_per_sec": 97.89287178947346, "iter": 190, "memory": 16133, "step": 190} +{"base_lr": 8.784313725490186e-06, "lr": 8.784313725490186e-06, "data_time": 0.009262800216674805, "loss": 0.018798828125, "time": 0.9854555130004883, "tflops": 6.014863090594988, "tokens_per_sec": 99.44639682567994, "iter": 200, "memory": 16133, "step": 200} +{"base_lr": 9.215686274509796e-06, "lr": 9.215686274509796e-06, "data_time": 0.00849294662475586, "loss": 0.24609375, "time": 1.0007083415985107, "tflops": 2.8992261261044914, "tokens_per_sec": 47.966023670071365, "iter": 210, "memory": 16133, "step": 210} +{"base_lr": 9.647058823529404e-06, "lr": 9.647058823529404e-06, "data_time": 0.008557796478271484, "loss": 0.60546875, "time": 1.041980266571045, "tflops": 3.306858757630567, "tokens_per_sec": 54.70353117868656, "iter": 220, "memory": 16133, "step": 220} +{"base_lr": 1.0078431372549012e-05, "lr": 1.0078431372549012e-05, "data_time": 0.008387565612792969, "loss": 1.4921875, "time": 0.9833061695098877, "tflops": 4.365667028221028, "tokens_per_sec": 72.20538444838249, "iter": 230, "memory": 16133, "step": 230} +{"base_lr": 1.0509803921568623e-05, "lr": 1.0509803921568623e-05, "data_time": 0.009382009506225586, "loss": 0.2333984375, "time": 1.0031208992004395, "tflops": 2.8922533374448074, "tokens_per_sec": 47.85066290435345, "iter": 240, "memory": 16133, "step": 240} +{"base_lr": 1.094117647058823e-05, "lr": 1.094117647058823e-05, "data_time": 0.008673906326293945, "loss": 0.060546875, "time": 0.9933228492736816, "tflops": 4.199795616055937, "tokens_per_sec": 69.46382039875895, "iter": 250, "memory": 16133, "step": 250} +{"base_lr": 1.1372549019607831e-05, "lr": 1.1372549019607831e-05, "data_time": 0.008640050888061523, "loss": 1.6796875, "time": 1.219836950302124, "tflops": 4.759864760480628, "tokens_per_sec": 78.69904250411862, "iter": 260, "memory": 16133, "step": 260} +{"base_lr": 1.1803921568627442e-05, "lr": 1.1803921568627442e-05, "data_time": 0.009214401245117188, "loss": 0.7421875, "time": 1.002384901046753, "tflops": 2.8340398032159366, "tokens_per_sec": 46.8881763391216, "iter": 270, "memory": 16133, "step": 270} +{"base_lr": 1.2235294117647047e-05, "lr": 1.2235294117647047e-05, "data_time": 0.008814096450805664, "loss": 0.1767578125, "time": 1.2698991298675537, "tflops": 2.284653718029013, "tokens_per_sec": 37.79827773011266, "iter": 280, "memory": 16133, "step": 280} +{"base_lr": 1.2666666666666648e-05, "lr": 1.2666666666666648e-05, "data_time": 0.008632898330688477, "loss": 1.9375, "time": 0.9910604953765869, "tflops": 3.049507796240577, "tokens_per_sec": 50.45100701037464, "iter": 290, "memory": 16133, "step": 290} +{"base_lr": 1.3098039215686255e-05, "lr": 1.3098039215686255e-05, "data_time": 0.00885772705078125, "loss": 0.05322265625, "time": 1.0014095306396484, "tflops": 4.891161294926447, "tokens_per_sec": 80.8859887204993, "iter": 300, "memory": 16133, "step": 300} +{"base_lr": 1.352941176470586e-05, "lr": 1.352941176470586e-05, "data_time": 0.008812665939331055, "loss": 1.8203125, "time": 1.002932071685791, "tflops": 5.3666503097259435, "tokens_per_sec": 88.7398085199474, "iter": 310, "memory": 16133, "step": 310} +{"base_lr": 1.3960784313725465e-05, "lr": 1.3960784313725465e-05, "data_time": 0.008949041366577148, "loss": 3.640625, "time": 1.2422642707824707, "tflops": 3.163339361343473, "tokens_per_sec": 52.32381026221242, "iter": 320, "memory": 16133, "step": 320} +{"base_lr": 1.4392156862745072e-05, "lr": 1.4392156862745072e-05, "data_time": 0.008502960205078125, "loss": 0.2158203125, "time": 0.9795055389404297, "tflops": 2.9619840350381974, "tokens_per_sec": 49.00431706784886, "iter": 330, "memory": 16133, "step": 330} +{"base_lr": 1.4823529411764678e-05, "lr": 1.4823529411764678e-05, "data_time": 0.008882761001586914, "loss": 1.484375, "time": 0.9972810745239258, "tflops": 3.9404171655738987, "tokens_per_sec": 65.17721198205231, "iter": 340, "memory": 16133, "step": 340} +{"base_lr": 1.5254901960784286e-05, "lr": 1.5254901960784286e-05, "data_time": 0.0091705322265625, "loss": 0.7890625, "time": 0.9842259883880615, "tflops": 2.8863276739539225, "tokens_per_sec": 47.75326048535618, "iter": 350, "memory": 16133, "step": 350} +{"base_lr": 1.5686274509803884e-05, "lr": 1.5686274509803884e-05, "data_time": 0.00878143310546875, "loss": 1.7578125, "time": 1.0178942680358887, "tflops": 5.049844508657472, "tokens_per_sec": 83.50572615359258, "iter": 360, "memory": 16133, "step": 360} +{"base_lr": 1.6117647058823492e-05, "lr": 1.6117647058823492e-05, "data_time": 0.00905752182006836, "loss": 1.4765625, "time": 1.0026702880859375, "tflops": 5.971995990922202, "tokens_per_sec": 98.73634551282935, "iter": 370, "memory": 16133, "step": 370} +{"base_lr": 1.6549019607843093e-05, "lr": 1.6549019607843093e-05, "data_time": 0.00919795036315918, "loss": 0.0252685546875, "time": 0.9891290664672852, "tflops": 3.3000747452371644, "tokens_per_sec": 54.593482115340734, "iter": 380, "memory": 16133, "step": 380} +{"base_lr": 1.698039215686269e-05, "lr": 1.698039215686269e-05, "data_time": 0.009583234786987305, "loss": 0.029296875, "time": 1.0506329536437988, "tflops": 3.682727726109483, "tokens_per_sec": 60.91566020081006, "iter": 390, "memory": 16133, "step": 390} +{"base_lr": 1.7411764705882296e-05, "lr": 1.7411764705882296e-05, "data_time": 0.008723020553588867, "loss": 0.10595703125, "time": 1.0348429679870605, "tflops": 2.978937558954903, "tokens_per_sec": 49.282839597542115, "iter": 400, "memory": 16133, "step": 400} +{"base_lr": 1.7843137254901907e-05, "lr": 1.7843137254901907e-05, "data_time": 0.008978605270385742, "loss": 0.84375, "time": 0.9999411106109619, "tflops": 3.0829141358868366, "tokens_per_sec": 51.00300353566632, "iter": 410, "memory": 16133, "step": 410} +{"base_lr": 1.8274509803921515e-05, "lr": 1.8274509803921515e-05, "data_time": 0.008895158767700195, "loss": 0.03564453125, "time": 1.0184733867645264, "tflops": 4.214923412493927, "tokens_per_sec": 69.71217993774212, "iter": 420, "memory": 16133, "step": 420} +{"base_lr": 1.870588235294112e-05, "lr": 1.870588235294112e-05, "data_time": 0.009042739868164062, "loss": 1.4375, "time": 1.2031400203704834, "tflops": 3.668590559570265, "tokens_per_sec": 60.67456718583794, "iter": 430, "memory": 16133, "step": 430} +{"base_lr": 1.9137254901960735e-05, "lr": 1.9137254901960735e-05, "data_time": 0.008719682693481445, "loss": 0.0390625, "time": 0.9837861061096191, "tflops": 4.240508096017895, "tokens_per_sec": 70.13719707100792, "iter": 440, "memory": 16133, "step": 440} +{"base_lr": 1.9568627450980336e-05, "lr": 1.9568627450980336e-05, "data_time": 0.008626937866210938, "loss": 0.31640625, "time": 1.000011920928955, "tflops": 2.9617271285303546, "tokens_per_sec": 48.999415881395436, "iter": 450, "memory": 16133, "step": 450} +{"base_lr": 1.999999999999995e-05, "lr": 1.999999999999995e-05, "data_time": 0.009011507034301758, "loss": 0.03662109375, "time": 1.2423996925354004, "tflops": 3.65009374039839, "tokens_per_sec": 60.36704649120203, "iter": 460, "memory": 16133, "step": 460} +{"base_lr": 1.9999947744086436e-05, "lr": 1.9999947744086436e-05, "data_time": 0.008683919906616211, "loss": 1.40625, "time": 1.0127382278442383, "tflops": 5.1353347901021, "tokens_per_sec": 84.91829145521511, "iter": 470, "memory": 16133, "step": 470} +{"base_lr": 1.999976710706184e-05, "lr": 1.999976710706184e-05, "data_time": 0.009174585342407227, "loss": 3.4375, "time": 0.9958086013793945, "tflops": 4.250083125023432, "tokens_per_sec": 70.29463282699675, "iter": 480, "memory": 16133, "step": 480} +{"base_lr": 1.999945744612158e-05, "lr": 1.999945744612158e-05, "data_time": 0.009193897247314453, "loss": 0.0250244140625, "time": 0.9861748218536377, "tflops": 4.352967879271975, "tokens_per_sec": 71.9953485189114, "iter": 490, "memory": 16133, "step": 490} +{"base_lr": 1.999901876526112e-05, "lr": 1.999901876526112e-05, "data_time": 0.008847951889038086, "loss": 0.03759765625, "time": 1.0194499492645264, "tflops": 3.8547291780152584, "tokens_per_sec": 63.759873691523495, "iter": 500, "memory": 16133, "step": 500} +{"base_lr": 1.999845107014062e-05, "lr": 1.999845107014062e-05, "data_time": 0.009894609451293945, "loss": 0.017822265625, "time": 0.988703727722168, "tflops": 4.035802956249438, "tokens_per_sec": 66.75407217487468, "iter": 510, "memory": 16133, "step": 510} +{"base_lr": 1.9997754368084873e-05, "lr": 1.9997754368084873e-05, "data_time": 0.009016036987304688, "loss": 1.4453125, "time": 0.9823880195617676, "tflops": 4.431351800126319, "tokens_per_sec": 73.29079606655333, "iter": 520, "memory": 16133, "step": 520} +{"base_lr": 1.999692866808322e-05, "lr": 1.999692866808322e-05, "data_time": 0.007185459136962891, "loss": 1.90625, "time": 0.9557437896728516, "tflops": 4.364928124864526, "tokens_per_sec": 72.19508067485985, "iter": 530, "memory": 16133, "step": 530} +{"base_lr": 1.99959739807894e-05, "lr": 1.99959739807894e-05, "data_time": 0.009478330612182617, "loss": 0.04736328125, "time": 0.9996399879455566, "tflops": 4.112718796915313, "tokens_per_sec": 68.02448963619837, "iter": 540, "memory": 16133, "step": 540} +{"base_lr": 1.999489031852146e-05, "lr": 1.999489031852146e-05, "data_time": 0.009806156158447266, "loss": 0.734375, "time": 1.0192034244537354, "tflops": 5.162162340699301, "tokens_per_sec": 85.36078069649757, "iter": 550, "memory": 16133, "step": 550} +{"base_lr": 1.9993677695261562e-05, "lr": 1.9993677695261562e-05, "data_time": 0.008526802062988281, "loss": 1.4609375, "time": 1.0077121257781982, "tflops": 5.581543768987468, "tokens_per_sec": 92.28826132075085, "iter": 560, "memory": 16133, "step": 560} +{"base_lr": 1.9992336126655814e-05, "lr": 1.9992336126655814e-05, "data_time": 0.008981466293334961, "loss": 1.1953125, "time": 1.056100606918335, "tflops": 4.236671107427695, "tokens_per_sec": 70.06908197492601, "iter": 570, "memory": 16133, "step": 570} +{"base_lr": 1.9990865630014053e-05, "lr": 1.9990865630014053e-05, "data_time": 0.008991003036499023, "loss": 0.035400390625, "time": 1.3262734413146973, "tflops": 3.0998420389822074, "tokens_per_sec": 51.271478325421526, "iter": 580, "memory": 16133, "step": 580} +{"base_lr": 1.9989266224309644e-05, "lr": 1.9989266224309644e-05, "data_time": 0.009540319442749023, "loss": 0.12060546875, "time": 1.029627799987793, "tflops": 3.7578580419003007, "tokens_per_sec": 62.15838383607805, "iter": 590, "memory": 16133, "step": 590} +{"base_lr": 1.9987537930179225e-05, "lr": 1.9987537930179225e-05, "data_time": 0.009036779403686523, "loss": 1.03125, "time": 0.9971165657043457, "tflops": 4.608690214778731, "tokens_per_sec": 76.21977471233637, "iter": 600, "memory": 16133, "step": 600} +{"base_lr": 1.9985680769922452e-05, "lr": 1.9985680769922452e-05, "data_time": 0.00832056999206543, "loss": 1.5546875, "time": 0.9819443225860596, "tflops": 3.940340627621436, "tokens_per_sec": 65.17681148294, "iter": 610, "memory": 16133, "step": 610} +{"base_lr": 1.9983694767501683e-05, "lr": 1.9983694767501683e-05, "data_time": 0.009741067886352539, "loss": 0.10693359375, "time": 1.0000543594360352, "tflops": 3.2035350150160613, "tokens_per_sec": 52.997119106440884, "iter": 620, "memory": 16133, "step": 620} +{"base_lr": 1.9981579948541705e-05, "lr": 1.9981579948541705e-05, "data_time": 0.00917816162109375, "loss": 1.0234375, "time": 0.9978580474853516, "tflops": 2.846896625094218, "tokens_per_sec": 47.10088786515785, "iter": 630, "memory": 16133, "step": 630} +{"base_lr": 1.9979336340329377e-05, "lr": 1.9979336340329377e-05, "data_time": 0.008788347244262695, "loss": 1.21875, "time": 1.2734053134918213, "tflops": 4.226765552272326, "tokens_per_sec": 69.89133707623854, "iter": 640, "memory": 16133, "step": 640} +{"base_lr": 1.9976963971813275e-05, "lr": 1.9976963971813275e-05, "data_time": 0.008724689483642578, "loss": 0.0198974609375, "time": 0.9978935718536377, "tflops": 3.7560951565864933, "tokens_per_sec": 62.130874222157516, "iter": 650, "memory": 16133, "step": 650} +{"base_lr": 1.9974462873603337e-05, "lr": 1.9974462873603337e-05, "data_time": 0.009305715560913086, "loss": 0.255859375, "time": 0.9893627166748047, "tflops": 3.360438084316289, "tokens_per_sec": 55.59134084291803, "iter": 660, "memory": 16133, "step": 660} +{"base_lr": 1.9971833077970475e-05, "lr": 1.9971833077970475e-05, "data_time": 0.008910894393920898, "loss": 0.01513671875, "time": 1.0174455642700195, "tflops": 3.862323059785078, "tokens_per_sec": 63.885481722622934, "iter": 670, "memory": 16133, "step": 670} +{"base_lr": 1.9969074618846124e-05, "lr": 1.9969074618846124e-05, "data_time": 0.008722543716430664, "loss": 1.7421875, "time": 0.9885158538818359, "tflops": 5.07743849252507, "tokens_per_sec": 83.96425780525477, "iter": 680, "memory": 16133, "step": 680} +{"base_lr": 1.9966187531821836e-05, "lr": 1.9966187531821836e-05, "data_time": 0.008638620376586914, "loss": 0.002197265625, "time": 0.9926965236663818, "tflops": 4.446301578925365, "tokens_per_sec": 73.53707629629996, "iter": 690, "memory": 16133, "step": 690} +{"base_lr": 1.9963171854148803e-05, "lr": 1.9963171854148803e-05, "data_time": 0.008794307708740234, "loss": 0.265625, "time": 1.0040645599365234, "tflops": 2.5281417043839784, "tokens_per_sec": 41.82997954097034, "iter": 700, "memory": 16133, "step": 700} +{"base_lr": 1.9960027624737383e-05, "lr": 1.9960027624737383e-05, "data_time": 0.009371280670166016, "loss": 0.0810546875, "time": 1.011732578277588, "tflops": 3.3459296763970214, "tokens_per_sec": 55.35059481358323, "iter": 710, "memory": 16133, "step": 710} +{"base_lr": 1.995675488415661e-05, "lr": 1.995675488415661e-05, "data_time": 0.009412765502929688, "loss": 0.1962890625, "time": 1.0025129318237305, "tflops": 2.894007325466831, "tokens_per_sec": 47.87968162428836, "iter": 720, "memory": 16133, "step": 720} +{"base_lr": 1.9953353674633637e-05, "lr": 1.9953353674633637e-05, "data_time": 0.008684158325195312, "loss": 0.197265625, "time": 1.0114426612854004, "tflops": 4.423731664809659, "tokens_per_sec": 73.16282260220595, "iter": 730, "memory": 16133, "step": 730} +{"base_lr": 1.9949824040053233e-05, "lr": 1.9949824040053233e-05, "data_time": 0.009159564971923828, "loss": 0.06787109375, "time": 1.0097177028656006, "tflops": 3.2926947233678874, "tokens_per_sec": 54.47067021193581, "iter": 740, "memory": 16133, "step": 740} +{"base_lr": 1.9946166025957175e-05, "lr": 1.9946166025957175e-05, "data_time": 0.00884699821472168, "loss": 0.05859375, "time": 1.0112392902374268, "tflops": 3.8261914323361914, "tokens_per_sec": 63.288680154931754, "iter": 750, "memory": 16133, "step": 750} +{"base_lr": 1.9942379679543698e-05, "lr": 1.9942379679543698e-05, "data_time": 0.008803844451904297, "loss": 1.328125, "time": 0.9931695461273193, "tflops": 4.992693551994335, "tokens_per_sec": 82.56394924679401, "iter": 760, "memory": 16133, "step": 760} +{"base_lr": 1.9938465049666864e-05, "lr": 1.9938465049666864e-05, "data_time": 0.008560895919799805, "loss": 0.1357421875, "time": 0.9753861427307129, "tflops": 2.9744935277125606, "tokens_per_sec": 49.21127940732161, "iter": 770, "memory": 16133, "step": 770} +{"base_lr": 1.9934422186835943e-05, "lr": 1.9934422186835943e-05, "data_time": 0.009826421737670898, "loss": 0.043701171875, "time": 0.990581750869751, "tflops": 4.028151562142633, "tokens_per_sec": 66.62751453071292, "iter": 780, "memory": 16133, "step": 780} +{"base_lr": 1.993025114321474e-05, "lr": 1.993025114321474e-05, "data_time": 0.009480953216552734, "loss": 0.046630859375, "time": 1.2005980014801025, "tflops": 3.7771804843966157, "tokens_per_sec": 62.46886960287889, "iter": 790, "memory": 16133, "step": 790} +{"base_lr": 1.992595197262094e-05, "lr": 1.992595197262094e-05, "data_time": 0.00901937484741211, "loss": 0.4609375, "time": 0.9970369338989258, "tflops": 3.213230170766735, "tokens_per_sec": 53.15750921351495, "iter": 800, "memory": 16133, "step": 800} +{"base_lr": 1.9921524730525433e-05, "lr": 1.9921524730525433e-05, "data_time": 0.00908207893371582, "loss": 0.01483154296875, "time": 1.0055177211761475, "tflops": 5.894853832746864, "tokens_per_sec": 97.46223058632181, "iter": 810, "memory": 16133, "step": 810} +{"base_lr": 1.9916969474051568e-05, "lr": 1.9916969474051568e-05, "data_time": 0.008567333221435547, "loss": 0.0169677734375, "time": 1.2569775581359863, "tflops": 2.9819014570055837, "tokens_per_sec": 49.324667412433776, "iter": 820, "memory": 16133, "step": 820} +{"base_lr": 1.9912286261974386e-05, "lr": 1.9912286261974386e-05, "data_time": 0.008791923522949219, "loss": 0.00848388671875, "time": 0.9821064472198486, "tflops": 4.864023713875271, "tokens_per_sec": 80.43934567739895, "iter": 830, "memory": 16133, "step": 830} +{"base_lr": 1.9907475154719963e-05, "lr": 1.9907475154719963e-05, "data_time": 0.009096384048461914, "loss": 0.01068115234375, "time": 0.9947891235351562, "tflops": 4.436948511144007, "tokens_per_sec": 73.38238655093897, "iter": 840, "memory": 16133, "step": 840} +{"base_lr": 1.9902536214364513e-05, "lr": 1.9902536214364513e-05, "data_time": 0.00857853889465332, "loss": 1.2734375, "time": 0.9982521533966064, "tflops": 4.724716292695462, "tokens_per_sec": 78.13657073969006, "iter": 850, "memory": 16133, "step": 850} +{"base_lr": 1.9897469504633664e-05, "lr": 1.9897469504633664e-05, "data_time": 0.008818387985229492, "loss": 1.5859375, "time": 1.0080606937408447, "tflops": 6.180392537923417, "tokens_per_sec": 102.17638743325247, "iter": 860, "memory": 16133, "step": 860} +{"base_lr": 1.9892275090901615e-05, "lr": 1.9892275090901615e-05, "data_time": 0.0088653564453125, "loss": 0.53125, "time": 0.9986014366149902, "tflops": 2.9053430750187075, "tokens_per_sec": 48.06722506094119, "iter": 870, "memory": 16133, "step": 870} +{"base_lr": 1.9886953040190286e-05, "lr": 1.9886953040190286e-05, "data_time": 0.009137153625488281, "loss": 1.9765625, "time": 1.2483093738555908, "tflops": 2.6148965315764743, "tokens_per_sec": 43.25850717051786, "iter": 880, "memory": 16133, "step": 880} +{"base_lr": 1.988150342116846e-05, "lr": 1.988150342116846e-05, "data_time": 0.008737325668334961, "loss": 0.0223388671875, "time": 1.3624787330627441, "tflops": 2.662190352200015, "tokens_per_sec": 44.03738461669836, "iter": 890, "memory": 16133, "step": 890} +{"base_lr": 1.9875926304150895e-05, "lr": 1.9875926304150895e-05, "data_time": 0.008765220642089844, "loss": 1.609375, "time": 0.9669051170349121, "tflops": 5.316144975628551, "tokens_per_sec": 87.90934963770907, "iter": 900, "memory": 16133, "step": 900} +{"base_lr": 1.9870221761097415e-05, "lr": 1.9870221761097415e-05, "data_time": 0.009374618530273438, "loss": 1.390625, "time": 0.9999887943267822, "tflops": 4.534926157696055, "tokens_per_sec": 75.00084043483398, "iter": 910, "memory": 16133, "step": 910} +{"base_lr": 1.986438986561198e-05, "lr": 1.986438986561198e-05, "data_time": 0.009124517440795898, "loss": 1.3828125, "time": 1.0255787372589111, "tflops": 4.303743788964113, "tokens_per_sec": 71.17932280366661, "iter": 920, "memory": 16133, "step": 920} +{"base_lr": 1.985843069294174e-05, "lr": 1.985843069294174e-05, "data_time": 0.009217500686645508, "loss": 2.1875, "time": 1.2584664821624756, "tflops": 3.7477821461464464, "tokens_per_sec": 61.9801966166849, "iter": 930, "memory": 16133, "step": 930} +{"base_lr": 1.9852344319976067e-05, "lr": 1.9852344319976067e-05, "data_time": 0.0091094970703125, "loss": 1.140625, "time": 0.9903407096862793, "tflops": 2.685303282899226, "tokens_per_sec": 44.429154097779055, "iter": 940, "memory": 16133, "step": 940} +{"base_lr": 1.984613082524555e-05, "lr": 1.984613082524555e-05, "data_time": 0.0087890625, "loss": 1.640625, "time": 0.9890744686126709, "tflops": 4.9521605218516305, "tokens_per_sec": 81.89474359148413, "iter": 950, "memory": 16133, "step": 950} +{"base_lr": 1.9839790288921006e-05, "lr": 1.9839790288921006e-05, "data_time": 0.009489297866821289, "loss": 0.0247802734375, "time": 1.383854627609253, "tflops": 3.9331690641229264, "tokens_per_sec": 65.03573294791732, "iter": 960, "memory": 16133, "step": 960} +{"base_lr": 1.983332279281242e-05, "lr": 1.983332279281242e-05, "data_time": 0.009531259536743164, "loss": 0.98828125, "time": 1.0031132698059082, "tflops": 5.063902017336285, "tokens_per_sec": 83.73929697507577, "iter": 970, "memory": 16133, "step": 970} +{"base_lr": 1.982672842036791e-05, "lr": 1.982672842036791e-05, "data_time": 0.00927734375, "loss": 0.08740234375, "time": 0.9879059791564941, "tflops": 3.9165620919226813, "tokens_per_sec": 64.78349291354677, "iter": 980, "memory": 16133, "step": 980} +{"base_lr": 1.9820007256672626e-05, "lr": 1.9820007256672626e-05, "data_time": 0.009133100509643555, "loss": 1.15625, "time": 0.9826757907867432, "tflops": 5.169219958619294, "tokens_per_sec": 85.48088880124239, "iter": 990, "memory": 16133, "step": 990} +{"base_lr": 1.9813159388447673e-05, "lr": 1.9813159388447673e-05, "data_time": 0.008687496185302734, "loss": 1.3359375, "time": 1.0025537014007568, "tflops": 5.066728399176701, "tokens_per_sec": 83.78603548373752, "iter": 1000, "memory": 16133, "step": 1000} +{"base_lr": 1.980618490404899e-05, "lr": 1.980618490404899e-05, "data_time": 0.00909733772277832, "loss": 1.6640625, "time": 1.3062586784362793, "tflops": 3.5179872373040473, "tokens_per_sec": 58.18143163720169, "iter": 1010, "memory": 16133, "step": 1010} +{"base_lr": 1.979908389346619e-05, "lr": 1.979908389346619e-05, "data_time": 0.009068012237548828, "loss": 0.06396484375, "time": 0.9959368705749512, "tflops": 3.8849802860641574, "tokens_per_sec": 64.26110117098963, "iter": 1020, "memory": 16133, "step": 1020} +{"base_lr": 1.979185644832144e-05, "lr": 1.979185644832144e-05, "data_time": 0.00920867919921875, "loss": 1.3828125, "time": 0.9904766082763672, "tflops": 5.495267798425173, "tokens_per_sec": 90.86534628670094, "iter": 1030, "memory": 16133, "step": 1030} +{"base_lr": 1.9784502661868234e-05, "lr": 1.9784502661868234e-05, "data_time": 0.00901484489440918, "loss": 0.0157470703125, "time": 0.9842207431793213, "tflops": 2.9477937634204587, "tokens_per_sec": 48.76954721041255, "iter": 1040, "memory": 16133, "step": 1040} +{"base_lr": 1.9777022628990215e-05, "lr": 1.9777022628990215e-05, "data_time": 0.008942842483520508, "loss": 0.09228515625, "time": 1.0177338123321533, "tflops": 4.753228625498831, "tokens_per_sec": 78.60601566985389, "iter": 1050, "memory": 16133, "step": 1050} +{"base_lr": 1.976941644619995e-05, "lr": 1.976941644619995e-05, "data_time": 0.008702278137207031, "loss": 0.515625, "time": 1.3479816913604736, "tflops": 2.152313927680784, "tokens_per_sec": 35.60879224666588, "iter": 1060, "memory": 16133, "step": 1060} +{"base_lr": 1.9761684211637654e-05, "lr": 1.9761684211637654e-05, "data_time": 0.009236335754394531, "loss": 1.40625, "time": 1.0147202014923096, "tflops": 3.9919625026788315, "tokens_per_sec": 66.02805374466742, "iter": 1070, "memory": 16133, "step": 1070} +{"base_lr": 1.9753826025069985e-05, "lr": 1.9753826025069985e-05, "data_time": 0.009155988693237305, "loss": 0.51953125, "time": 1.016932487487793, "tflops": 3.8047708731447525, "tokens_per_sec": 62.93436465781638, "iter": 1080, "memory": 16133, "step": 1080} +{"base_lr": 1.9745841987888698e-05, "lr": 1.9745841987888698e-05, "data_time": 0.009380340576171875, "loss": 0.01055908203125, "time": 1.2443771362304688, "tflops": 3.2552229361425025, "tokens_per_sec": 53.84219787491917, "iter": 1090, "memory": 16133, "step": 1090} +{"base_lr": 1.973773220310936e-05, "lr": 1.973773220310936e-05, "data_time": 0.00909113883972168, "loss": 1.21875, "time": 0.991276741027832, "tflops": 4.574782351993165, "tokens_per_sec": 75.66000178936768, "iter": 1100, "memory": 16133, "step": 1100} +{"base_lr": 1.972949677537004e-05, "lr": 1.972949677537004e-05, "data_time": 0.009260416030883789, "loss": 0.1640625, "time": 0.9921650886535645, "tflops": 4.204696371013364, "tokens_per_sec": 69.5448779532932, "iter": 1110, "memory": 16133, "step": 1110} +{"base_lr": 1.9721135810929925e-05, "lr": 1.9721135810929925e-05, "data_time": 0.008704423904418945, "loss": 0.6640625, "time": 0.9859991073608398, "tflops": 2.8197989549088756, "tokens_per_sec": 46.65318625194153, "iter": 1120, "memory": 16133, "step": 1120} +{"base_lr": 1.9712649417667957e-05, "lr": 1.9712649417667957e-05, "data_time": 0.009140968322753906, "loss": 1.171875, "time": 0.9946060180664062, "tflops": 4.80289578190372, "tokens_per_sec": 79.42843554627078, "iter": 1130, "memory": 16133, "step": 1130} +{"base_lr": 1.9704037705081455e-05, "lr": 1.9704037705081455e-05, "data_time": 0.009226560592651367, "loss": 1.3203125, "time": 1.2457458972930908, "tflops": 3.8346416064529625, "tokens_per_sec": 63.415821935755474, "iter": 1140, "memory": 16133, "step": 1140} +{"base_lr": 1.9695300784284703e-05, "lr": 1.9695300784284703e-05, "data_time": 0.008873462677001953, "loss": 1.15625, "time": 1.3674635887145996, "tflops": 3.6703927537041428, "tokens_per_sec": 60.69631446491264, "iter": 1150, "memory": 16133, "step": 1150} +{"base_lr": 1.96864387680075e-05, "lr": 1.96864387680075e-05, "data_time": 0.008931636810302734, "loss": 1.7734375, "time": 1.319000005722046, "tflops": 3.3922296500819664, "tokens_per_sec": 56.103108172038915, "iter": 1160, "memory": 16133, "step": 1160} +{"base_lr": 1.9677451770593687e-05, "lr": 1.9677451770593687e-05, "data_time": 0.009285926818847656, "loss": 1.4609375, "time": 1.008923053741455, "tflops": 4.074877814840242, "tokens_per_sec": 67.39859868179616, "iter": 1170, "memory": 16133, "step": 1170} +{"base_lr": 1.9668339907999766e-05, "lr": 1.9668339907999766e-05, "data_time": 0.009534120559692383, "loss": 0.0654296875, "time": 1.3686044216156006, "tflops": 2.8271098991348373, "tokens_per_sec": 46.76296451271359, "iter": 1180, "memory": 16133, "step": 1180} +{"base_lr": 1.9659103297793265e-05, "lr": 1.9659103297793265e-05, "data_time": 0.009553194046020508, "loss": 0.0966796875, "time": 0.9937186241149902, "tflops": 4.076329955751324, "tokens_per_sec": 67.42351242496139, "iter": 1190, "memory": 16133, "step": 1190} +{"base_lr": 1.9649742059151334e-05, "lr": 1.9649742059151334e-05, "data_time": 0.010024785995483398, "loss": 0.1435546875, "time": 1.3382084369659424, "tflops": 3.1174164147158594, "tokens_per_sec": 51.56147435177506, "iter": 1200, "memory": 16133, "step": 1200} +{"base_lr": 1.9640256312859143e-05, "lr": 1.9640256312859143e-05, "data_time": 0.009113311767578125, "loss": 0.06494140625, "time": 1.2687287330627441, "tflops": 3.860601095406822, "tokens_per_sec": 63.84343468315725, "iter": 1210, "memory": 16133, "step": 1210} +{"base_lr": 1.963064618130835e-05, "lr": 1.963064618130835e-05, "data_time": 0.009511232376098633, "loss": 0.1416015625, "time": 1.0045092105865479, "tflops": 3.430214012449507, "tokens_per_sec": 56.744128773752216, "iter": 1220, "memory": 16133, "step": 1220} +{"base_lr": 1.9620911788495544e-05, "lr": 1.9620911788495544e-05, "data_time": 0.008709430694580078, "loss": 0.26953125, "time": 1.3086321353912354, "tflops": 2.3094700378071824, "tokens_per_sec": 38.20783446144973, "iter": 1230, "memory": 16133, "step": 1230} +{"base_lr": 1.9611053260020586e-05, "lr": 1.9611053260020586e-05, "data_time": 0.00928044319152832, "loss": 2.203125, "time": 1.002645492553711, "tflops": 3.919334893679352, "tokens_per_sec": 64.82849669466117, "iter": 1240, "memory": 16133, "step": 1240} +{"base_lr": 1.9601070723085036e-05, "lr": 1.9601070723085036e-05, "data_time": 0.008513212203979492, "loss": 0.1796875, "time": 0.9911537170410156, "tflops": 3.110246707402825, "tokens_per_sec": 51.45518714514197, "iter": 1250, "memory": 16133, "step": 1250} +{"base_lr": 1.9590964306490494e-05, "lr": 1.9590964306490494e-05, "data_time": 0.008801460266113281, "loss": 1.734375, "time": 1.024923324584961, "tflops": 4.7789482580257125, "tokens_per_sec": 79.03030212793881, "iter": 1260, "memory": 16133, "step": 1260} +{"base_lr": 1.958073414063693e-05, "lr": 1.958073414063693e-05, "data_time": 0.008933067321777344, "loss": 0.2138671875, "time": 1.0364642143249512, "tflops": 2.5658050921145192, "tokens_per_sec": 42.45202042852462, "iter": 1270, "memory": 16133, "step": 1270} +{"base_lr": 1.9570380357521033e-05, "lr": 1.9570380357521033e-05, "data_time": 0.009038209915161133, "loss": 0.10986328125, "time": 1.3490376472473145, "tflops": 3.3615632225312355, "tokens_per_sec": 55.59518680073938, "iter": 1280, "memory": 16133, "step": 1280} +{"base_lr": 1.9559903090734485e-05, "lr": 1.9559903090734485e-05, "data_time": 0.007570505142211914, "loss": 1.0703125, "time": 1.0035035610198975, "tflops": 3.9159835775392584, "tokens_per_sec": 64.7730636191, "iter": 1290, "memory": 16133, "step": 1290} +{"base_lr": 1.9549302475462224e-05, "lr": 1.9549302475462224e-05, "data_time": 0.009075641632080078, "loss": 1.4140625, "time": 0.9850924015045166, "tflops": 5.095083912235305, "tokens_per_sec": 84.25605544530758, "iter": 1300, "memory": 16133, "step": 1300} +{"base_lr": 1.953857864848072e-05, "lr": 1.953857864848072e-05, "data_time": 0.00930476188659668, "loss": 0.12890625, "time": 0.9715664386749268, "tflops": 4.667591592583451, "tokens_per_sec": 77.19492668171179, "iter": 1310, "memory": 16133, "step": 1310} +{"base_lr": 1.9527731748156227e-05, "lr": 1.9527731748156227e-05, "data_time": 0.009043693542480469, "loss": 0.2041015625, "time": 1.0203123092651367, "tflops": 2.784244276887392, "tokens_per_sec": 46.064327140976, "iter": 1320, "memory": 16133, "step": 1320} +{"base_lr": 1.9516761914442945e-05, "lr": 1.9516761914442945e-05, "data_time": 0.008829116821289062, "loss": 0.068359375, "time": 1.2515339851379395, "tflops": 3.671815079675875, "tokens_per_sec": 60.725478414845306, "iter": 1330, "memory": 16133, "step": 1330} +{"base_lr": 1.950566928888126e-05, "lr": 1.950566928888126e-05, "data_time": 0.009313821792602539, "loss": 0.1904296875, "time": 1.008615493774414, "tflops": 2.8165328861630132, "tokens_per_sec": 46.598530649247955, "iter": 1340, "memory": 16133, "step": 1340} +{"base_lr": 1.949445401459592e-05, "lr": 1.949445401459592e-05, "data_time": 0.00870370864868164, "loss": 0.052490234375, "time": 1.0140647888183594, "tflops": 4.710733575902663, "tokens_per_sec": 77.9042925767859, "iter": 1350, "memory": 16133, "step": 1350} +{"base_lr": 1.9483116236294146e-05, "lr": 1.9483116236294146e-05, "data_time": 0.008784294128417969, "loss": 0.2060546875, "time": 1.03938627243042, "tflops": 2.6167763371720487, "tokens_per_sec": 43.29478000005927, "iter": 1360, "memory": 16133, "step": 1360} +{"base_lr": 1.9471656100263793e-05, "lr": 1.9471656100263793e-05, "data_time": 0.009187459945678711, "loss": 1.3515625, "time": 1.016573429107666, "tflops": 4.877750142790672, "tokens_per_sec": 80.66313524631249, "iter": 1370, "memory": 16133, "step": 1370} +{"base_lr": 1.946007375437147e-05, "lr": 1.946007375437147e-05, "data_time": 0.009354114532470703, "loss": 0.054931640625, "time": 1.2237787246704102, "tflops": 3.3100142316656447, "tokens_per_sec": 54.74845954524155, "iter": 1380, "memory": 16133, "step": 1380} +{"base_lr": 1.9448369348060627e-05, "lr": 1.9448369348060627e-05, "data_time": 0.009099960327148438, "loss": 1.75, "time": 1.001380443572998, "tflops": 3.984712756081639, "tokens_per_sec": 65.90901632194982, "iter": 1390, "memory": 16133, "step": 1390} +{"base_lr": 1.943654303234961e-05, "lr": 1.943654303234961e-05, "data_time": 0.008838415145874023, "loss": 1.5078125, "time": 1.3178200721740723, "tflops": 5.2333243236833855, "tokens_per_sec": 86.50649842648254, "iter": 1400, "memory": 16133, "step": 1400} +{"base_lr": 1.9424594959829723e-05, "lr": 1.9424594959829723e-05, "data_time": 0.008842706680297852, "loss": 0.1943359375, "time": 1.0276379585266113, "tflops": 4.530709424380318, "tokens_per_sec": 74.92911230169501, "iter": 1410, "memory": 16133, "step": 1410} +{"base_lr": 1.9412525284663273e-05, "lr": 1.9412525284663273e-05, "data_time": 0.008708000183105469, "loss": 0.05859375, "time": 0.9853134155273438, "tflops": 5.9542664878526, "tokens_per_sec": 98.44583304286662, "iter": 1420, "memory": 16133, "step": 1420} +{"base_lr": 1.9400334162581558e-05, "lr": 1.9400334162581558e-05, "data_time": 0.009298086166381836, "loss": 1.3203125, "time": 1.1812093257904053, "tflops": 4.249143938660664, "tokens_per_sec": 70.26696978064437, "iter": 1430, "memory": 16133, "step": 1430} +{"base_lr": 1.9388021750882875e-05, "lr": 1.9388021750882875e-05, "data_time": 0.008642435073852539, "loss": 0.2216796875, "time": 1.021488904953003, "tflops": 2.8402459923993426, "tokens_per_sec": 46.99023138402214, "iter": 1440, "memory": 16133, "step": 1440} +{"base_lr": 1.937558820843048e-05, "lr": 1.937558820843048e-05, "data_time": 0.008488893508911133, "loss": 0.1396484375, "time": 1.00783109664917, "tflops": 4.439584115576362, "tokens_per_sec": 73.42500171503069, "iter": 1450, "memory": 16133, "step": 1450} +{"base_lr": 1.936303369565055e-05, "lr": 1.936303369565055e-05, "data_time": 0.00916147232055664, "loss": 0.2060546875, "time": 0.9994895458221436, "tflops": 2.7212304663377314, "tokens_per_sec": 45.02298216930286, "iter": 1460, "memory": 16133, "step": 1460} +{"base_lr": 1.9350358374530084e-05, "lr": 1.9350358374530084e-05, "data_time": 0.008870363235473633, "loss": 1.5234375, "time": 0.9840962886810303, "tflops": 4.485158791206604, "tokens_per_sec": 74.17973306023401, "iter": 1470, "memory": 16133, "step": 1470} +{"base_lr": 1.9337562408614862e-05, "lr": 1.9337562408614862e-05, "data_time": 0.00956869125366211, "loss": 1.390625, "time": 1.017620325088501, "tflops": 4.872732065916515, "tokens_per_sec": 80.58015153420604, "iter": 1480, "memory": 16133, "step": 1480} +{"base_lr": 1.9324645963007276e-05, "lr": 1.9324645963007276e-05, "data_time": 0.008959770202636719, "loss": 1.3203125, "time": 1.3244366645812988, "tflops": 3.698218017986349, "tokens_per_sec": 61.15807736683716, "iter": 1490, "memory": 16133, "step": 1490} +{"base_lr": 1.931160920436425e-05, "lr": 1.931160920436425e-05, "data_time": 0.009025335311889648, "loss": 1.796875, "time": 0.9959440231323242, "tflops": 4.188742389946086, "tokens_per_sec": 69.28100214198803, "iter": 1500, "memory": 16133, "step": 1500} +{"base_lr": 1.9298452300895065e-05, "lr": 1.9298452300895065e-05, "data_time": 0.009225845336914062, "loss": 0.0712890625, "time": 1.2883358001708984, "tflops": 4.083790526142332, "tokens_per_sec": 67.52897807263592, "iter": 1510, "memory": 16133, "step": 1510} +{"base_lr": 1.9285175422359204e-05, "lr": 1.9285175422359204e-05, "data_time": 0.008836746215820312, "loss": 0.158203125, "time": 1.0168769359588623, "tflops": 3.5669780776532134, "tokens_per_sec": 59.00419006294414, "iter": 1520, "memory": 16133, "step": 1520} +{"base_lr": 1.927177874006414e-05, "lr": 1.927177874006414e-05, "data_time": 0.010029315948486328, "loss": 2.203125, "time": 1.3005573749542236, "tflops": 3.53340917352317, "tokens_per_sec": 58.4364838211129, "iter": 1530, "memory": 16133, "step": 1530} +{"base_lr": 1.9258262426863132e-05, "lr": 1.9258262426863132e-05, "data_time": 0.008970260620117188, "loss": 0.06689453125, "time": 0.979363203048706, "tflops": 4.939456041288984, "tokens_per_sec": 81.68573186217588, "iter": 1540, "memory": 16133, "step": 1540} +{"base_lr": 1.924462665715302e-05, "lr": 1.924462665715302e-05, "data_time": 0.008864402770996094, "loss": 0.353515625, "time": 0.9957895278930664, "tflops": 2.6706096865028566, "tokens_per_sec": 44.186044106180624, "iter": 1550, "memory": 16133, "step": 1550} +{"base_lr": 1.9230871606871927e-05, "lr": 1.9230871606871927e-05, "data_time": 0.00916910171508789, "loss": 1.7265625, "time": 0.9844110012054443, "tflops": 4.668173510584612, "tokens_per_sec": 77.20352566850457, "iter": 1560, "memory": 16133, "step": 1560} +{"base_lr": 1.921699745349705e-05, "lr": 1.921699745349705e-05, "data_time": 0.008910417556762695, "loss": 1.4375, "time": 1.3103103637695312, "tflops": 4.06151013418069, "tokens_per_sec": 67.15966112545458, "iter": 1570, "memory": 16133, "step": 1570} +{"base_lr": 1.92030043760423e-05, "lr": 1.92030043760423e-05, "data_time": 0.009324312210083008, "loss": 0.04638671875, "time": 0.9729022979736328, "tflops": 4.287946442717492, "tokens_per_sec": 70.92181829937367, "iter": 1580, "memory": 16133, "step": 1580} +{"base_lr": 1.9188892555056045e-05, "lr": 1.9188892555056045e-05, "data_time": 0.008805513381958008, "loss": 0.1181640625, "time": 0.9898886680603027, "tflops": 4.397774274391839, "tokens_per_sec": 72.73545230193616, "iter": 1590, "memory": 16133, "step": 1590} +{"base_lr": 1.9174662172618773e-05, "lr": 1.9174662172618773e-05, "data_time": 0.00888967514038086, "loss": 2.03125, "time": 1.251549243927002, "tflops": 4.107075933910037, "tokens_per_sec": 67.91582545583783, "iter": 1600, "memory": 16133, "step": 1600} +{"base_lr": 1.916031341234073e-05, "lr": 1.916031341234073e-05, "data_time": 0.009091615676879883, "loss": 0.12255859375, "time": 1.0005128383636475, "tflops": 3.504381505695518, "tokens_per_sec": 57.97027062121644, "iter": 1610, "memory": 16133, "step": 1610} +{"base_lr": 1.914584645935954e-05, "lr": 1.914584645935954e-05, "data_time": 0.009047508239746094, "loss": 0.07373046875, "time": 0.9965634346008301, "tflops": 4.489780351673946, "tokens_per_sec": 74.25518279181715, "iter": 1620, "memory": 16133, "step": 1620} +{"base_lr": 1.9131261500337853e-05, "lr": 1.9131261500337853e-05, "data_time": 0.009209394454956055, "loss": 0.119140625, "time": 1.0046005249023438, "tflops": 3.851476295738815, "tokens_per_sec": 63.70691475216745, "iter": 1630, "memory": 16133, "step": 1630} +{"base_lr": 1.9116558723460897e-05, "lr": 1.9116558723460897e-05, "data_time": 0.008789300918579102, "loss": 1.546875, "time": 1.3550353050231934, "tflops": 2.721463904775646, "tokens_per_sec": 45.01727724275854, "iter": 1640, "memory": 16133, "step": 1640} +{"base_lr": 1.910173831843408e-05, "lr": 1.910173831843408e-05, "data_time": 0.00905752182006836, "loss": 0.2392578125, "time": 0.9816780090332031, "tflops": 3.7565063476852476, "tokens_per_sec": 62.13850105495708, "iter": 1650, "memory": 16133, "step": 1650} +{"base_lr": 1.9086800476480517e-05, "lr": 1.9086800476480517e-05, "data_time": 0.00870060920715332, "loss": 0.20703125, "time": 0.9887776374816895, "tflops": 4.157899625483769, "tokens_per_sec": 68.77178186707371, "iter": 1660, "memory": 16133, "step": 1660} +{"base_lr": 1.90717453903386e-05, "lr": 1.90717453903386e-05, "data_time": 0.00900411605834961, "loss": 0.203125, "time": 0.9985446929931641, "tflops": 2.90550817497856, "tokens_per_sec": 48.06995654452948, "iter": 1670, "memory": 16133, "step": 1670} +{"base_lr": 1.9056573254259453e-05, "lr": 1.9056573254259453e-05, "data_time": 0.008977413177490234, "loss": 0.162109375, "time": 1.0100758075714111, "tflops": 3.7107939660888247, "tokens_per_sec": 61.38153150010504, "iter": 1680, "memory": 16133, "step": 1680} +{"base_lr": 1.9041284264004476e-05, "lr": 1.9041284264004476e-05, "data_time": 0.010865926742553711, "loss": 1.59375, "time": 1.0095634460449219, "tflops": 6.35118927904244, "tokens_per_sec": 104.99587758962737, "iter": 1690, "memory": 16133, "step": 1690} +{"base_lr": 1.9025878616842803e-05, "lr": 1.9025878616842803e-05, "data_time": 0.00914311408996582, "loss": 1.7578125, "time": 1.3444130420684814, "tflops": 3.4181469649284333, "tokens_per_sec": 56.53024600461455, "iter": 1700, "memory": 16133, "step": 1700} +{"base_lr": 1.901035651154876e-05, "lr": 1.901035651154876e-05, "data_time": 0.008980989456176758, "loss": 0.2578125, "time": 1.265146017074585, "tflops": 1.9108278465343103, "tokens_per_sec": 31.61690386731877, "iter": 1710, "memory": 16133, "step": 1710} +{"base_lr": 1.8994718148399294e-05, "lr": 1.8994718148399294e-05, "data_time": 0.009119272232055664, "loss": 0.1396484375, "time": 0.9852135181427002, "tflops": 4.234364298586006, "tokens_per_sec": 70.03557983045852, "iter": 1720, "memory": 16133, "step": 1720} +{"base_lr": 1.897896372917139e-05, "lr": 1.897896372917139e-05, "data_time": 0.009035587310791016, "loss": 0.1455078125, "time": 0.9875144958496094, "tflops": 3.8568429864873357, "tokens_per_sec": 63.79653186329591, "iter": 1730, "memory": 16132, "step": 1730} +{"base_lr": 1.8963093457139462e-05, "lr": 1.8963093457139462e-05, "data_time": 0.008828163146972656, "loss": 1.734375, "time": 0.9862356185913086, "tflops": 5.273334035677221, "tokens_per_sec": 87.20025760451752, "iter": 1740, "memory": 16133, "step": 1740} +{"base_lr": 1.8947107537072736e-05, "lr": 1.8947107537072736e-05, "data_time": 0.008733272552490234, "loss": 0.1943359375, "time": 1.2328109741210938, "tflops": 2.353385741591124, "tokens_per_sec": 38.93540940790346, "iter": 1750, "memory": 16133, "step": 1750} +{"base_lr": 1.8931006175232633e-05, "lr": 1.8931006175232633e-05, "data_time": 0.009323358535766602, "loss": 0.1748046875, "time": 1.0296733379364014, "tflops": 4.93328041373527, "tokens_per_sec": 81.5792707309148, "iter": 1760, "memory": 16133, "step": 1760} +{"base_lr": 1.8914789579370055e-05, "lr": 1.8914789579370055e-05, "data_time": 0.008675575256347656, "loss": 0.10791015625, "time": 1.3282709121704102, "tflops": 3.277423964509896, "tokens_per_sec": 54.2058094777495, "iter": 1770, "memory": 16133, "step": 1770} +{"base_lr": 1.889845795872275e-05, "lr": 1.889845795872275e-05, "data_time": 0.00889444351196289, "loss": 0.212890625, "time": 1.234731674194336, "tflops": 2.2027793242224045, "tokens_per_sec": 36.44516532656871, "iter": 1780, "memory": 16133, "step": 1780} +{"base_lr": 1.8882011524012582e-05, "lr": 1.8882011524012582e-05, "data_time": 0.008870124816894531, "loss": 1.21875, "time": 0.9832320213317871, "tflops": 5.474176589424018, "tokens_per_sec": 90.517800548602, "iter": 1790, "memory": 16133, "step": 1790} +{"base_lr": 1.8865450487442844e-05, "lr": 1.8865450487442844e-05, "data_time": 0.009136199951171875, "loss": 1.4296875, "time": 1.0184617042541504, "tflops": 4.630962748693011, "tokens_per_sec": 76.58609025171458, "iter": 1800, "memory": 16133, "step": 1800} +{"base_lr": 1.8848775062695478e-05, "lr": 1.8848775062695478e-05, "data_time": 0.008932352066040039, "loss": 1.6015625, "time": 0.9700255393981934, "tflops": 5.174222990133545, "tokens_per_sec": 85.56475745103359, "iter": 1810, "memory": 16133, "step": 1810} +{"base_lr": 1.8831985464928373e-05, "lr": 1.8831985464928373e-05, "data_time": 0.009530067443847656, "loss": 0.74609375, "time": 1.0016024112701416, "tflops": 4.829782192497583, "tokens_per_sec": 79.87201218742211, "iter": 1820, "memory": 16133, "step": 1820} +{"base_lr": 1.8815081910772526e-05, "lr": 1.8815081910772526e-05, "data_time": 0.009420394897460938, "loss": 0.53515625, "time": 1.0017173290252686, "tflops": 3.922966440822098, "tokens_per_sec": 64.88856498388026, "iter": 1830, "memory": 16133, "step": 1830} +{"base_lr": 1.8798064618329295e-05, "lr": 1.8798064618329295e-05, "data_time": 0.00950932502746582, "loss": 1.6484375, "time": 1.2904047966003418, "tflops": 3.232902542442132, "tokens_per_sec": 53.47159293094048, "iter": 1840, "memory": 16133, "step": 1840} +{"base_lr": 1.8780933807167557e-05, "lr": 1.8780933807167557e-05, "data_time": 0.009421348571777344, "loss": 1.71875, "time": 1.01251220703125, "tflops": 5.256073738582088, "tokens_per_sec": 86.91253240090276, "iter": 1850, "memory": 16133, "step": 1850} +{"base_lr": 1.8763689698320902e-05, "lr": 1.8763689698320902e-05, "data_time": 0.008850574493408203, "loss": 1.3515625, "time": 0.9921727180480957, "tflops": 7.012053798294158, "tokens_per_sec": 115.90723863696225, "iter": 1860, "memory": 16133, "step": 1860} +{"base_lr": 1.8746332514284753e-05, "lr": 1.8746332514284753e-05, "data_time": 0.008228778839111328, "loss": 0.1806640625, "time": 1.0279145240783691, "tflops": 3.1755557252734747, "tokens_per_sec": 52.5335509276552, "iter": 1870, "memory": 16133, "step": 1870} +{"base_lr": 1.8728862479013512e-05, "lr": 1.8728862479013512e-05, "data_time": 0.008748769760131836, "loss": 0.171875, "time": 0.9852142333984375, "tflops": 3.0062115778134584, "tokens_per_sec": 49.73537565624453, "iter": 1880, "memory": 16133, "step": 1880} +{"base_lr": 1.8711279817917668e-05, "lr": 1.8711279817917668e-05, "data_time": 0.008911371231079102, "loss": 0.125, "time": 1.0064077377319336, "tflops": 4.145191646786643, "tokens_per_sec": 68.56068113648611, "iter": 1890, "memory": 16133, "step": 1890} +{"base_lr": 1.8693584757860872e-05, "lr": 1.8693584757860872e-05, "data_time": 0.009386539459228516, "loss": 0.1328125, "time": 1.3082497119903564, "tflops": 3.4663683081551953, "tokens_per_sec": 57.328504881410225, "iter": 1900, "memory": 16133, "step": 1900} +{"base_lr": 1.8675777527157045e-05, "lr": 1.8675777527157045e-05, "data_time": 0.009516239166259766, "loss": 1.1796875, "time": 0.9849696159362793, "tflops": 5.341579527024749, "tokens_per_sec": 88.32759771702437, "iter": 1910, "memory": 16133, "step": 1910} +{"base_lr": 1.8657858355567394e-05, "lr": 1.8657858355567394e-05, "data_time": 0.009712934494018555, "loss": 0.1904296875, "time": 1.2618374824523926, "tflops": 2.5868623316573314, "tokens_per_sec": 42.794734465335196, "iter": 1920, "memory": 16133, "step": 1920} +{"base_lr": 1.8639827474297472e-05, "lr": 1.8639827474297472e-05, "data_time": 0.008854866027832031, "loss": 0.19921875, "time": 0.9910397529602051, "tflops": 3.9041775032645565, "tokens_per_sec": 64.5786405729633, "iter": 1930, "memory": 16133, "step": 1930} +{"base_lr": 1.8621685115994187e-05, "lr": 1.8621685115994187e-05, "data_time": 0.008912801742553711, "loss": 1.1640625, "time": 0.9983706474304199, "tflops": 3.633097334714218, "tokens_per_sec": 60.09792070146125, "iter": 1940, "memory": 16133, "step": 1940} +{"base_lr": 1.8603431514742813e-05, "lr": 1.8603431514742813e-05, "data_time": 0.008697986602783203, "loss": 1.3671875, "time": 1.0254595279693604, "tflops": 4.894516370538878, "tokens_per_sec": 80.93932304113225, "iter": 1950, "memory": 16133, "step": 1950} +{"base_lr": 1.8585066906063937e-05, "lr": 1.8585066906063937e-05, "data_time": 0.009708881378173828, "loss": 1.4296875, "time": 1.0016229152679443, "tflops": 5.010996025014384, "tokens_per_sec": 82.86551628834668, "iter": 1960, "memory": 16133, "step": 1960} +{"base_lr": 1.8566591526910446e-05, "lr": 1.8566591526910446e-05, "data_time": 0.00975942611694336, "loss": 0.08544921875, "time": 1.020911455154419, "tflops": 4.086302417947608, "tokens_per_sec": 67.58666449628166, "iter": 1970, "memory": 16133, "step": 1970} +{"base_lr": 1.8548005615664474e-05, "lr": 1.8548005615664474e-05, "data_time": 0.010078907012939453, "loss": 1.859375, "time": 0.9981284141540527, "tflops": 3.8764502177081055, "tokens_per_sec": 64.1200060957868, "iter": 1980, "memory": 16133, "step": 1980} +{"base_lr": 1.8529309412134306e-05, "lr": 1.8529309412134306e-05, "data_time": 0.009151935577392578, "loss": 1.3125, "time": 1.306913137435913, "tflops": 3.284676846465031, "tokens_per_sec": 54.3264873281812, "iter": 1990, "memory": 16133, "step": 1990} +{"base_lr": 1.8510503157551303e-05, "lr": 1.8510503157551303e-05, "data_time": 0.009372949600219727, "loss": 0.140625, "time": 1.0017821788787842, "tflops": 8.094232302334712, "tokens_per_sec": 133.76161287860188, "iter": 2000, "memory": 16133, "step": 2000} +{"base_lr": 1.8491587094566775e-05, "lr": 1.8491587094566775e-05, "data_time": 0.008806943893432617, "loss": 0.1162109375, "time": 1.0125634670257568, "tflops": 4.0602276325927935, "tokens_per_sec": 67.15628423734461, "iter": 2010, "memory": 16133, "step": 2010} +{"base_lr": 1.847256146724887e-05, "lr": 1.847256146724887e-05, "data_time": 0.009635686874389648, "loss": 1.8046875, "time": 0.9832067489624023, "tflops": 4.797015702291075, "tokens_per_sec": 79.33224632788132, "iter": 2020, "memory": 16133, "step": 2020} +{"base_lr": 1.845342652107938e-05, "lr": 1.845342652107938e-05, "data_time": 0.008751630783081055, "loss": 1.59375, "time": 1.2931995391845703, "tflops": 3.03874486951553, "tokens_per_sec": 50.262931612963314, "iter": 2030, "memory": 16133, "step": 2030} +{"base_lr": 1.843418250295062e-05, "lr": 1.843418250295062e-05, "data_time": 0.009129762649536133, "loss": 1.671875, "time": 1.360168218612671, "tflops": 4.446881945649959, "tokens_per_sec": 73.52031802501851, "iter": 2040, "memory": 16133, "step": 2040} +{"base_lr": 1.841482966116225e-05, "lr": 1.841482966116225e-05, "data_time": 0.00910639762878418, "loss": 0.365234375, "time": 1.2489264011383057, "tflops": 3.7764100503228324, "tokens_per_sec": 62.45364012550796, "iter": 2050, "memory": 16133, "step": 2050} +{"base_lr": 1.839536824541802e-05, "lr": 1.839536824541802e-05, "data_time": 0.009278059005737305, "loss": 0.1044921875, "time": 0.9893813133239746, "tflops": 4.705899455392991, "tokens_per_sec": 77.82641430858357, "iter": 2060, "memory": 16133, "step": 2060} +{"base_lr": 1.8375798506822576e-05, "lr": 1.8375798506822576e-05, "data_time": 0.009011030197143555, "loss": 0.09521484375, "time": 0.9995815753936768, "tflops": 4.173499242485354, "tokens_per_sec": 69.02888338328556, "iter": 2070, "memory": 16133, "step": 2070} +{"base_lr": 1.8356120697878238e-05, "lr": 1.8356120697878238e-05, "data_time": 0.008310794830322266, "loss": 0.359375, "time": 0.9764566421508789, "tflops": 2.909293239535504, "tokens_per_sec": 48.133217565526664, "iter": 2080, "memory": 16133, "step": 2080} +{"base_lr": 1.833633507248171e-05, "lr": 1.833633507248171e-05, "data_time": 0.009128093719482422, "loss": 1.3828125, "time": 1.0164463520050049, "tflops": 4.282869342115836, "tokens_per_sec": 70.83502228908057, "iter": 2090, "memory": 16133, "step": 2090} +{"base_lr": 1.8316441885920828e-05, "lr": 1.8316441885920828e-05, "data_time": 0.009154558181762695, "loss": 1.0390625, "time": 1.35164213180542, "tflops": 2.9521227056360155, "tokens_per_sec": 48.82949298997762, "iter": 2100, "memory": 16133, "step": 2100} +{"base_lr": 1.829644139487125e-05, "lr": 1.829644139487125e-05, "data_time": 0.009849309921264648, "loss": 0.2109375, "time": 1.004249095916748, "tflops": 2.889004112993123, "tokens_per_sec": 47.796906360303446, "iter": 2110, "memory": 16133, "step": 2110} +{"base_lr": 1.8276333857393156e-05, "lr": 1.8276333857393156e-05, "data_time": 0.00888824462890625, "loss": 1.5, "time": 0.9867494106292725, "tflops": 5.147879751301228, "tokens_per_sec": 85.12799612045998, "iter": 2120, "memory": 16133, "step": 2120} +{"base_lr": 1.8256119532927907e-05, "lr": 1.8256119532927907e-05, "data_time": 0.008811712265014648, "loss": 1.03125, "time": 1.301722764968872, "tflops": 3.99528224801413, "tokens_per_sec": 66.06629484734442, "iter": 2130, "memory": 16133, "step": 2130} +{"base_lr": 1.823579868229471e-05, "lr": 1.823579868229471e-05, "data_time": 0.009162425994873047, "loss": 0.020263671875, "time": 1.042445182800293, "tflops": 4.698621680497179, "tokens_per_sec": 77.7019274839317, "iter": 2140, "memory": 16133, "step": 2140} +{"base_lr": 1.821537156768724e-05, "lr": 1.821537156768724e-05, "data_time": 0.008759021759033203, "loss": 1.2734375, "time": 1.0112552642822266, "tflops": 4.484402208787808, "tokens_per_sec": 74.16525050493524, "iter": 2150, "memory": 16133, "step": 2150} +{"base_lr": 1.8194838452670265e-05, "lr": 1.8194838452670265e-05, "data_time": 0.010076045989990234, "loss": 1.5, "time": 1.3205020427703857, "tflops": 3.388371076262416, "tokens_per_sec": 56.039292332099315, "iter": 2160, "memory": 16133, "step": 2160} +{"base_lr": 1.8174199602176248e-05, "lr": 1.8174199602176248e-05, "data_time": 0.007833003997802734, "loss": 0.00531005859375, "time": 0.980004072189331, "tflops": 3.8246608543724996, "tokens_per_sec": 63.26504323745167, "iter": 2170, "memory": 16133, "step": 2170} +{"base_lr": 1.8153455282501915e-05, "lr": 1.8153455282501915e-05, "data_time": 0.009055852890014648, "loss": 0.498046875, "time": 1.0288655757904053, "tflops": 2.8198822439406914, "tokens_per_sec": 46.65332491378022, "iter": 2180, "memory": 16133, "step": 2180} +{"base_lr": 1.8132605761304844e-05, "lr": 1.8132605761304844e-05, "data_time": 0.009076833724975586, "loss": 0.005126953125, "time": 1.2224373817443848, "tflops": 3.4126516498993262, "tokens_per_sec": 56.444608967604076, "iter": 2190, "memory": 16133, "step": 2190} +{"base_lr": 1.8111651307599985e-05, "lr": 1.8111651307599985e-05, "data_time": 0.008786678314208984, "loss": 0.4765625, "time": 1.0142147541046143, "tflops": 3.218450371401406, "tokens_per_sec": 53.24316155075058, "iter": 2200, "memory": 16133, "step": 2200} +{"base_lr": 1.809059219175621e-05, "lr": 1.809059219175621e-05, "data_time": 0.009404897689819336, "loss": 1.5, "time": 1.0102522373199463, "tflops": 5.327764210080213, "tokens_per_sec": 88.09681059060664, "iter": 2210, "memory": 16133, "step": 2210} +{"base_lr": 1.8069428685492783e-05, "lr": 1.8069428685492783e-05, "data_time": 0.009118318557739258, "loss": 1.15625, "time": 1.0003910064697266, "tflops": 4.412102959769219, "tokens_per_sec": 72.97146768395716, "iter": 2220, "memory": 16133, "step": 2220} +{"base_lr": 1.8048161061875924e-05, "lr": 1.8048161061875924e-05, "data_time": 0.009263992309570312, "loss": 0.00168609619140625, "time": 1.0046331882476807, "tflops": 5.056240795157376, "tokens_per_sec": 83.61260705156712, "iter": 2230, "memory": 16133, "step": 2230} +{"base_lr": 1.8026789595315218e-05, "lr": 1.8026789595315218e-05, "data_time": 0.009464025497436523, "loss": 0.056396484375, "time": 1.2655162811279297, "tflops": 3.3921233467265477, "tokens_per_sec": 56.10358480466407, "iter": 2240, "memory": 16133, "step": 2240} +{"base_lr": 1.800531456156013e-05, "lr": 1.800531456156013e-05, "data_time": 0.009188413619995117, "loss": 0.859375, "time": 1.3102881908416748, "tflops": 3.045294504740829, "tokens_per_sec": 50.37059820981364, "iter": 2250, "memory": 16133, "step": 2250} +{"base_lr": 1.798373623769641e-05, "lr": 1.798373623769641e-05, "data_time": 0.009014368057250977, "loss": 0.263671875, "time": 1.0018773078918457, "tflops": 3.1373303333547287, "tokens_per_sec": 51.902562908992024, "iter": 2260, "memory": 16132, "step": 2260} +{"base_lr": 1.7962054902142516e-05, "lr": 1.7962054902142516e-05, "data_time": 0.009068012237548828, "loss": 0.006866455078125, "time": 1.011911392211914, "tflops": 3.9432439024863037, "tokens_per_sec": 65.22310205013788, "iter": 2270, "memory": 16133, "step": 2270} +{"base_lr": 1.794027083464605e-05, "lr": 1.794027083464605e-05, "data_time": 0.009247779846191406, "loss": 0.1474609375, "time": 1.0081837177276611, "tflops": 5.038434187407818, "tokens_per_sec": 83.31814779675646, "iter": 2280, "memory": 16133, "step": 2280} +{"base_lr": 1.7918384316280128e-05, "lr": 1.7918384316280128e-05, "data_time": 0.008588075637817383, "loss": 0.263671875, "time": 0.9813225269317627, "tflops": 2.771608037350554, "tokens_per_sec": 45.856483230495805, "iter": 2290, "memory": 16133, "step": 2290} +{"base_lr": 1.789639562943975e-05, "lr": 1.789639562943975e-05, "data_time": 0.008626937866210938, "loss": 0.17578125, "time": 1.0150651931762695, "tflops": 2.917805137029028, "tokens_per_sec": 48.27276152246382, "iter": 2300, "memory": 16133, "step": 2300} +{"base_lr": 1.7874305057838177e-05, "lr": 1.7874305057838177e-05, "data_time": 0.009023189544677734, "loss": 0.1240234375, "time": 1.0141160488128662, "tflops": 3.1591149367206377, "tokens_per_sec": 52.26226333957542, "iter": 2310, "memory": 16133, "step": 2310} +{"base_lr": 1.785211288650325e-05, "lr": 1.785211288650325e-05, "data_time": 0.008720159530639648, "loss": 0.2890625, "time": 1.0096406936645508, "tflops": 2.8735764978358382, "tokens_per_sec": 47.54166536783854, "iter": 2320, "memory": 16133, "step": 2320} +{"base_lr": 1.7829819401773726e-05, "lr": 1.7829819401773726e-05, "data_time": 0.00902104377746582, "loss": 0.2373046875, "time": 0.9972794055938721, "tflops": 2.969842171066717, "tokens_per_sec": 49.13367279531031, "iter": 2330, "memory": 16133, "step": 2330} +{"base_lr": 1.7807424891295573e-05, "lr": 1.7807424891295573e-05, "data_time": 0.008963823318481445, "loss": 1.4296875, "time": 0.9896371364593506, "tflops": 4.765846025388077, "tokens_per_sec": 78.81676740525697, "iter": 2340, "memory": 16133, "step": 2340} +{"base_lr": 1.7784929644018248e-05, "lr": 1.7784929644018248e-05, "data_time": 0.008906364440917969, "loss": 0.51953125, "time": 1.0156605243682861, "tflops": 2.9160948604468224, "tokens_per_sec": 48.24446635890319, "iter": 2350, "memory": 16133, "step": 2350} +{"base_lr": 1.776233395019101e-05, "lr": 1.776233395019101e-05, "data_time": 0.009368419647216797, "loss": 0.07666015625, "time": 0.9980580806732178, "tflops": 2.7857289132931142, "tokens_per_sec": 46.08950209483364, "iter": 2360, "memory": 16133, "step": 2360} +{"base_lr": 1.7739638101359147e-05, "lr": 1.7739638101359147e-05, "data_time": 0.009354591369628906, "loss": 0.275390625, "time": 0.9994604587554932, "tflops": 3.689670401676651, "tokens_per_sec": 61.032929782829896, "iter": 2370, "memory": 16133, "step": 2370} +{"base_lr": 1.7716842390360197e-05, "lr": 1.7716842390360197e-05, "data_time": 0.009092569351196289, "loss": 0.004730224609375, "time": 1.0271399021148682, "tflops": 3.766960177850757, "tokens_per_sec": 62.308941428682196, "iter": 2380, "memory": 16133, "step": 2380} +{"base_lr": 1.7693947111320203e-05, "lr": 1.7693947111320203e-05, "data_time": 0.00919651985168457, "loss": 0.004180908203125, "time": 1.0134422779083252, "tflops": 4.116418900855501, "tokens_per_sec": 68.08478539334587, "iter": 2390, "memory": 16133, "step": 2390} +{"base_lr": 1.7670952559649897e-05, "lr": 1.7670952559649897e-05, "data_time": 0.009264945983886719, "loss": 0.03564453125, "time": 1.0119457244873047, "tflops": 4.481342458453572, "tokens_per_sec": 74.11464684820335, "iter": 2400, "memory": 16133, "step": 2400} +{"base_lr": 1.7647859032040907e-05, "lr": 1.7647859032040907e-05, "data_time": 0.008965492248535156, "loss": 0.1865234375, "time": 0.9959022998809814, "tflops": 3.9458724670338508, "tokens_per_sec": 65.26744642290993, "iter": 2410, "memory": 16133, "step": 2410} +{"base_lr": 1.7624666826461906e-05, "lr": 1.7624666826461906e-05, "data_time": 0.008770227432250977, "loss": 1.5078125, "time": 1.0010662078857422, "tflops": 4.590506924672769, "tokens_per_sec": 75.9190545053324, "iter": 2420, "memory": 16133, "step": 2420} +{"base_lr": 1.760137624215477e-05, "lr": 1.760137624215477e-05, "data_time": 0.008911848068237305, "loss": 0.236328125, "time": 0.9800698757171631, "tflops": 4.997659511829058, "tokens_per_sec": 82.64716833648811, "iter": 2430, "memory": 16133, "step": 2430} +{"base_lr": 1.7577987579630746e-05, "lr": 1.7577987579630746e-05, "data_time": 0.008982419967651367, "loss": 0.66015625, "time": 1.0140538215637207, "tflops": 7.51809155099508, "tokens_per_sec": 124.25375983059514, "iter": 2440, "memory": 16133, "step": 2440} +{"base_lr": 1.755450114066654e-05, "lr": 1.755450114066654e-05, "data_time": 0.008801698684692383, "loss": 0.93359375, "time": 1.2994318008422852, "tflops": 3.024170612424416, "tokens_per_sec": 50.02186336967996, "iter": 2450, "memory": 16133, "step": 2450} +{"base_lr": 1.7530917228300436e-05, "lr": 1.7530917228300436e-05, "data_time": 0.008737325668334961, "loss": 1.59375, "time": 0.9831006526947021, "tflops": 5.721275152399855, "tokens_per_sec": 94.59865553439539, "iter": 2460, "memory": 16133, "step": 2460} +{"base_lr": 1.7507236146828404e-05, "lr": 1.7507236146828404e-05, "data_time": 0.009652376174926758, "loss": 0.0024261474609375, "time": 1.0070018768310547, "tflops": 4.924113155172383, "tokens_per_sec": 81.42983830175696, "iter": 2470, "memory": 16133, "step": 2470} +{"base_lr": 1.748345820180014e-05, "lr": 1.748345820180014e-05, "data_time": 0.009114742279052734, "loss": 1.0078125, "time": 0.970496416091919, "tflops": 4.423290237548896, "tokens_per_sec": 73.15843605670997, "iter": 2480, "memory": 16133, "step": 2480} +{"base_lr": 1.7459583700015158e-05, "lr": 1.7459583700015158e-05, "data_time": 0.00874638557434082, "loss": 0.31640625, "time": 1.2873003482818604, "tflops": 2.253770670105107, "tokens_per_sec": 37.28733551888459, "iter": 2490, "memory": 16133, "step": 2490} +{"base_lr": 1.7435612949518786e-05, "lr": 1.7435612949518786e-05, "data_time": 0.009273290634155273, "loss": 0.1259765625, "time": 0.997673511505127, "tflops": 3.2718116842695726, "tokens_per_sec": 54.1259233378658, "iter": 2500, "memory": 16133, "step": 2500} +{"base_lr": 1.741154625959824e-05, "lr": 1.741154625959824e-05, "data_time": 0.009348154067993164, "loss": 1.5703125, "time": 1.011430025100708, "tflops": 5.501157652218687, "tokens_per_sec": 90.96032124491123, "iter": 2510, "memory": 16133, "step": 2510} +{"base_lr": 1.738738394077862e-05, "lr": 1.738738394077862e-05, "data_time": 0.008783817291259766, "loss": 1.140625, "time": 0.9921519756317139, "tflops": 4.936799660673375, "tokens_per_sec": 81.64071834694961, "iter": 2520, "memory": 16133, "step": 2520} +{"base_lr": 1.7363126304818878e-05, "lr": 1.7363126304818878e-05, "data_time": 0.0087890625, "loss": 0.002044677734375, "time": 1.0279045104980469, "tflops": 3.823023855642127, "tokens_per_sec": 63.23544583770971, "iter": 2530, "memory": 16133, "step": 2530} +{"base_lr": 1.733877366470781e-05, "lr": 1.733877366470781e-05, "data_time": 0.009189128875732422, "loss": 0.051025390625, "time": 1.303208351135254, "tflops": 2.4119090900633493, "tokens_per_sec": 39.90152453724052, "iter": 2540, "memory": 16133, "step": 2540} +{"base_lr": 1.731432633466005e-05, "lr": 1.731432633466005e-05, "data_time": 0.008774757385253906, "loss": 0.271484375, "time": 1.0078623294830322, "tflops": 3.238735843716952, "tokens_per_sec": 53.578746243690745, "iter": 2550, "memory": 16133, "step": 2550} +{"base_lr": 1.7289784630111958e-05, "lr": 1.7289784630111958e-05, "data_time": 0.008786678314208984, "loss": 0.083984375, "time": 1.3403632640838623, "tflops": 1.9840630000611743, "tokens_per_sec": 32.826921759931366, "iter": 2560, "memory": 16133, "step": 2560} +{"base_lr": 1.726514886771759e-05, "lr": 1.726514886771759e-05, "data_time": 0.008836746215820312, "loss": 0.8828125, "time": 1.413316011428833, "tflops": 2.780484642627356, "tokens_per_sec": 45.991129707955665, "iter": 2570, "memory": 16133, "step": 2570} +{"base_lr": 1.724041936534461e-05, "lr": 1.724041936534461e-05, "data_time": 0.008799076080322266, "loss": 1.390625, "time": 0.9931337833404541, "tflops": 7.188300452892283, "tokens_per_sec": 118.8158151291384, "iter": 2580, "memory": 16133, "step": 2580} +{"base_lr": 1.7215596442070162e-05, "lr": 1.7215596442070162e-05, "data_time": 0.008746862411499023, "loss": 0.07373046875, "time": 0.9789056777954102, "tflops": 5.127285049847124, "tokens_per_sec": 84.78855714356382, "iter": 2590, "memory": 16133, "step": 2590} +{"base_lr": 1.719068041817679e-05, "lr": 1.719068041817679e-05, "data_time": 0.009588241577148438, "loss": 1.328125, "time": 0.9937088489532471, "tflops": 5.477393319268516, "tokens_per_sec": 90.56978821786042, "iter": 2600, "memory": 16133, "step": 2600} +{"base_lr": 1.7165671615148274e-05, "lr": 1.7165671615148274e-05, "data_time": 0.008587837219238281, "loss": 0.1923828125, "time": 0.9900681972503662, "tflops": 3.3580435787656167, "tokens_per_sec": 55.55172881291547, "iter": 2610, "memory": 16133, "step": 2610} +{"base_lr": 1.7140570355665507e-05, "lr": 1.7140570355665507e-05, "data_time": 0.009155511856079102, "loss": 1.4765625, "time": 0.9990236759185791, "tflops": 5.266435282816593, "tokens_per_sec": 87.08502320519925, "iter": 2620, "memory": 16133, "step": 2620} +{"base_lr": 1.7115376963602302e-05, "lr": 1.7115376963602302e-05, "data_time": 0.008867502212524414, "loss": 1.0078125, "time": 0.9888744354248047, "tflops": 6.912902751331066, "tokens_per_sec": 114.2713330953315, "iter": 2630, "memory": 16133, "step": 2630} +{"base_lr": 1.709009176402123e-05, "lr": 1.709009176402123e-05, "data_time": 0.008832931518554688, "loss": 0.94140625, "time": 0.9761953353881836, "tflops": 4.521459958437849, "tokens_per_sec": 74.78011557071906, "iter": 2640, "memory": 16133, "step": 2640} +{"base_lr": 1.706471508316945e-05, "lr": 1.706471508316945e-05, "data_time": 0.008678436279296875, "loss": 1.5078125, "time": 0.974231481552124, "tflops": 4.592697949710033, "tokens_per_sec": 75.95730727365624, "iter": 2650, "memory": 16133, "step": 2650} +{"base_lr": 1.7039247248474455e-05, "lr": 1.7039247248474455e-05, "data_time": 0.009337663650512695, "loss": 0.2734375, "time": 0.9974789619445801, "tflops": 2.90861249135226, "tokens_per_sec": 48.121315668027854, "iter": 2660, "memory": 16133, "step": 2660} +{"base_lr": 1.701368858853986e-05, "lr": 1.701368858853986e-05, "data_time": 0.008939266204833984, "loss": 0.107421875, "time": 0.984154462814331, "tflops": 4.054458500127468, "tokens_per_sec": 67.06264361307315, "iter": 2670, "memory": 16133, "step": 2670} +{"base_lr": 1.6988039433141218e-05, "lr": 1.6988039433141218e-05, "data_time": 0.008816719055175781, "loss": 1.0546875, "time": 0.9927268028259277, "tflops": 5.299840318856845, "tokens_per_sec": 87.63740411990025, "iter": 2680, "memory": 16133, "step": 2680} +{"base_lr": 1.6962300113221653e-05, "lr": 1.6962300113221653e-05, "data_time": 0.009046554565429688, "loss": 0.01495361328125, "time": 0.9669690132141113, "tflops": 4.189094934493913, "tokens_per_sec": 69.28867325047905, "iter": 2690, "memory": 16133, "step": 2690} +{"base_lr": 1.6936470960887695e-05, "lr": 1.6936470960887695e-05, "data_time": 0.008945703506469727, "loss": 0.259765625, "time": 1.0180788040161133, "tflops": 2.8497595246342353, "tokens_per_sec": 47.14762728641696, "iter": 2700, "memory": 16133, "step": 2700} +{"base_lr": 1.6910552309404933e-05, "lr": 1.6910552309404933e-05, "data_time": 0.00936126708984375, "loss": 0.0137939453125, "time": 1.0246753692626953, "tflops": 4.307538029090775, "tokens_per_sec": 71.24207548040886, "iter": 2710, "memory": 16133, "step": 2710} +{"base_lr": 1.6884544493193754e-05, "lr": 1.6884544493193754e-05, "data_time": 0.00883936882019043, "loss": 0.255859375, "time": 0.9940416812896729, "tflops": 2.9186701354504083, "tokens_per_sec": 48.28771358729783, "iter": 2720, "memory": 16133, "step": 2720} +{"base_lr": 1.6858447847825e-05, "lr": 1.6858447847825e-05, "data_time": 0.008939504623413086, "loss": 1.1171875, "time": 0.9999771118164062, "tflops": 4.353406560476261, "tokens_per_sec": 72.00164798686617, "iter": 2730, "memory": 16133, "step": 2730} +{"base_lr": 1.6832262710015645e-05, "lr": 1.6832262710015645e-05, "data_time": 0.008743762969970703, "loss": 0.10009765625, "time": 0.9706697463989258, "tflops": 3.7367783962492696, "tokens_per_sec": 61.81299069279882, "iter": 2740, "memory": 16133, "step": 2740} +{"base_lr": 1.6805989417624473e-05, "lr": 1.6805989417624473e-05, "data_time": 0.008753299713134766, "loss": 0.006988525390625, "time": 1.3318495750427246, "tflops": 3.6776341926446134, "tokens_per_sec": 60.817679051585664, "iter": 2750, "memory": 16133, "step": 2750} +{"base_lr": 1.6779628309647667e-05, "lr": 1.6779628309647667e-05, "data_time": 0.008638858795166016, "loss": 0.2431640625, "time": 0.9679028987884521, "tflops": 2.9974905253456785, "tokens_per_sec": 49.59175146601295, "iter": 2760, "memory": 16133, "step": 2760} +{"base_lr": 1.6753179726214505e-05, "lr": 1.6753179726214505e-05, "data_time": 0.009190559387207031, "loss": 1.4453125, "time": 0.993187427520752, "tflops": 4.139438392645138, "tokens_per_sec": 68.46643253396472, "iter": 2770, "memory": 16133, "step": 2770} +{"base_lr": 1.6726644008582904e-05, "lr": 1.6726644008582904e-05, "data_time": 0.014373779296875, "loss": 1.78125, "time": 0.9902448654174805, "tflops": 4.946307431314039, "tokens_per_sec": 81.79795001084824, "iter": 2780, "memory": 16133, "step": 2780} +{"base_lr": 1.6700021499135056e-05, "lr": 1.6700021499135056e-05, "data_time": 0.009682655334472656, "loss": 0.05859375, "time": 1.0084917545318604, "tflops": 4.076620507899295, "tokens_per_sec": 67.42742287615233, "iter": 2790, "memory": 16133, "step": 2790} +{"base_lr": 1.6673312541372995e-05, "lr": 1.6673312541372995e-05, "data_time": 0.008786439895629883, "loss": 0.00107574462890625, "time": 1.0149304866790771, "tflops": 4.468163485396874, "tokens_per_sec": 73.89668650641414, "iter": 2800, "memory": 16133, "step": 2800} +{"base_lr": 1.66465174799142e-05, "lr": 1.66465174799142e-05, "data_time": 0.01021885871887207, "loss": 1.25, "time": 1.012080192565918, "tflops": 5.736955387451764, "tokens_per_sec": 94.8541436785925, "iter": 2810, "memory": 16133, "step": 2810} +{"base_lr": 1.6619636660487074e-05, "lr": 1.6619636660487074e-05, "data_time": 0.008965730667114258, "loss": 1.5703125, "time": 0.9998369216918945, "tflops": 4.5356150012160805, "tokens_per_sec": 75.01223286794831, "iter": 2820, "memory": 16133, "step": 2820} +{"base_lr": 1.6592670429926574e-05, "lr": 1.6592670429926574e-05, "data_time": 0.008708953857421875, "loss": 0.04638671875, "time": 1.0279772281646729, "tflops": 4.058215331438902, "tokens_per_sec": 67.1221094295288, "iter": 2830, "memory": 16133, "step": 2830} +{"base_lr": 1.656561913616965e-05, "lr": 1.656561913616965e-05, "data_time": 0.008490562438964844, "loss": 1.578125, "time": 0.9980182647705078, "tflops": 6.6674544708492505, "tokens_per_sec": 110.21842373314085, "iter": 2840, "memory": 16133, "step": 2840} +{"base_lr": 1.6538483128250825e-05, "lr": 1.6538483128250825e-05, "data_time": 0.009083271026611328, "loss": 0.09423828125, "time": 1.28802490234375, "tflops": 3.6147818066842032, "tokens_per_sec": 59.78145287395254, "iter": 2850, "memory": 16133, "step": 2850} +{"base_lr": 1.651126275629765e-05, "lr": 1.651126275629765e-05, "data_time": 0.009303092956542969, "loss": 0.1943359375, "time": 0.971623420715332, "tflops": 2.737032786788297, "tokens_per_sec": 45.28503436811031, "iter": 2860, "memory": 16133, "step": 2860} +{"base_lr": 1.6483958371526206e-05, "lr": 1.6483958371526206e-05, "data_time": 0.008953332901000977, "loss": 0.3671875, "time": 0.9972219467163086, "tflops": 3.0306660589939294, "tokens_per_sec": 50.13928961811542, "iter": 2870, "memory": 16133, "step": 2870} +{"base_lr": 1.645657032623656e-05, "lr": 1.645657032623656e-05, "data_time": 0.008953094482421875, "loss": 1.2890625, "time": 1.2781074047088623, "tflops": 3.832272247692585, "tokens_per_sec": 63.37495558003395, "iter": 2880, "memory": 16133, "step": 2880} +{"base_lr": 1.642909897380823e-05, "lr": 1.642909897380823e-05, "data_time": 0.008804559707641602, "loss": 1.703125, "time": 0.9786138534545898, "tflops": 3.8300941671699884, "tokens_per_sec": 63.354917551055905, "iter": 2890, "memory": 16133, "step": 2890} +{"base_lr": 1.6401544668695607e-05, "lr": 1.6401544668695607e-05, "data_time": 0.00880575180053711, "loss": 1.1484375, "time": 0.978473424911499, "tflops": 5.1295500922008594, "tokens_per_sec": 84.82601355005872, "iter": 2900, "memory": 16133, "step": 2900} +{"base_lr": 1.637390776642341e-05, "lr": 1.637390776642341e-05, "data_time": 0.00871586799621582, "loss": 1.234375, "time": 0.9933192729949951, "tflops": 5.113831422268679, "tokens_per_sec": 84.56495538100636, "iter": 2910, "memory": 16133, "step": 2910} +{"base_lr": 1.6346188623582078e-05, "lr": 1.6346188623582078e-05, "data_time": 0.008947372436523438, "loss": 0.380859375, "time": 1.329230785369873, "tflops": 3.8215089256864485, "tokens_per_sec": 63.194443677109746, "iter": 2920, "memory": 16133, "step": 2920} +{"base_lr": 1.631838759782318e-05, "lr": 1.631838759782318e-05, "data_time": 0.009073019027709961, "loss": 0.287109375, "time": 0.9708282947540283, "tflops": 5.1075882478715915, "tokens_per_sec": 84.46395767718253, "iter": 2930, "memory": 16133, "step": 2930} +{"base_lr": 1.6290505047854786e-05, "lr": 1.6290505047854786e-05, "data_time": 0.008533716201782227, "loss": 1.453125, "time": 1.2384631633758545, "tflops": 4.248243864505666, "tokens_per_sec": 70.24835503607675, "iter": 2940, "memory": 16133, "step": 2940} +{"base_lr": 1.6262541333436852e-05, "lr": 1.6262541333436852e-05, "data_time": 0.008906364440917969, "loss": 1.0078125, "time": 1.3177824020385742, "tflops": 3.4412929887219827, "tokens_per_sec": 56.91379690904969, "iter": 2950, "memory": 16133, "step": 2950} +{"base_lr": 1.6234496815376572e-05, "lr": 1.6234496815376572e-05, "data_time": 0.009525775909423828, "loss": 1.5703125, "time": 1.020555019378662, "tflops": 4.502845287217622, "tokens_per_sec": 74.46928245592885, "iter": 2960, "memory": 16133, "step": 2960} +{"base_lr": 1.6206371855523726e-05, "lr": 1.6206371855523726e-05, "data_time": 0.008729696273803711, "loss": 0.003936767578125, "time": 0.9787065982818604, "tflops": 5.252041611713346, "tokens_per_sec": 86.8493174043502, "iter": 2970, "memory": 16133, "step": 2970} +{"base_lr": 1.617816681676601e-05, "lr": 1.617816681676601e-05, "data_time": 0.00886082649230957, "loss": 0.00567626953125, "time": 0.9916579723358154, "tflops": 4.817174047998781, "tokens_per_sec": 79.66456399663548, "iter": 2980, "memory": 16133, "step": 2980} +{"base_lr": 1.6149882063024367e-05, "lr": 1.6149882063024367e-05, "data_time": 0.009074687957763672, "loss": 1.2734375, "time": 1.3574390411376953, "tflops": 4.4112057773504, "tokens_per_sec": 72.93145179981953, "iter": 2990, "memory": 16133, "step": 2990} +{"base_lr": 1.612151795924825e-05, "lr": 1.612151795924825e-05, "data_time": 0.009520292282104492, "loss": 0.130859375, "time": 1.0063247680664062, "tflops": 2.943147708428074, "tokens_per_sec": 48.69203417709962, "iter": 3000, "memory": 16133, "step": 3000} +{"base_lr": 1.6093074871410968e-05, "lr": 1.6093074871410968e-05, "data_time": 0.008962154388427734, "loss": 1.484375, "time": 1.0006539821624756, "tflops": 4.77386702494688, "tokens_per_sec": 78.94836917472423, "iter": 3010, "memory": 16133, "step": 3010} +{"base_lr": 1.6064553166504916e-05, "lr": 1.6064553166504916e-05, "data_time": 0.009066343307495117, "loss": 0.003143310546875, "time": 0.9957065582275391, "tflops": 3.825111249796361, "tokens_per_sec": 63.27165315862061, "iter": 3020, "memory": 16133, "step": 3020} +{"base_lr": 1.603595321253686e-05, "lr": 1.603595321253686e-05, "data_time": 0.007183551788330078, "loss": 1.0390625, "time": 0.9717576503753662, "tflops": 4.666673155640835, "tokens_per_sec": 77.17973711959165, "iter": 3030, "memory": 16133, "step": 3030} +{"base_lr": 1.6007275378523212e-05, "lr": 1.6007275378523212e-05, "data_time": 0.009069204330444336, "loss": 1.7890625, "time": 0.9727566242218018, "tflops": 6.2801665921740035, "tokens_per_sec": 103.8286427303391, "iter": 3040, "memory": 16133, "step": 3040} +{"base_lr": 1.5978520034485233e-05, "lr": 1.5978520034485233e-05, "data_time": 0.008836746215820312, "loss": 1.8515625, "time": 1.0088610649108887, "tflops": 4.795032396599126, "tokens_per_sec": 79.29734111305702, "iter": 3050, "memory": 16133, "step": 3050} +{"base_lr": 1.5949687551444268e-05, "lr": 1.5949687551444268e-05, "data_time": 0.008797883987426758, "loss": 0.310546875, "time": 0.9915597438812256, "tflops": 3.597037326089389, "tokens_per_sec": 59.50221392510247, "iter": 3060, "memory": 16133, "step": 3060} +{"base_lr": 1.592077830141697e-05, "lr": 1.592077830141697e-05, "data_time": 0.00867772102355957, "loss": 1.890625, "time": 1.0025248527526855, "tflops": 5.731230269551916, "tokens_per_sec": 94.760743076901, "iter": 3070, "memory": 16133, "step": 3070} +{"base_lr": 1.5891792657410487e-05, "lr": 1.5891792657410487e-05, "data_time": 0.008965492248535156, "loss": 1.1640625, "time": 1.0090394020080566, "tflops": 4.074407957102848, "tokens_per_sec": 67.39082722102626, "iter": 3080, "memory": 16133, "step": 3080} +{"base_lr": 1.586273099341766e-05, "lr": 1.586273099341766e-05, "data_time": 0.009026050567626953, "loss": 1.3203125, "time": 0.986558198928833, "tflops": 5.0261517205683575, "tokens_per_sec": 83.11724547923208, "iter": 3090, "memory": 16133, "step": 3090} +{"base_lr": 1.583359368441219e-05, "lr": 1.583359368441219e-05, "data_time": 0.008771181106567383, "loss": 0.1572265625, "time": 1.0032927989959717, "tflops": 2.8917577914209236, "tokens_per_sec": 47.842464381272684, "iter": 3100, "memory": 16133, "step": 3100} +{"base_lr": 1.5804381106343806e-05, "lr": 1.5804381106343806e-05, "data_time": 0.009276866912841797, "loss": 0.1455078125, "time": 0.9856677055358887, "tflops": 2.943466395701466, "tokens_per_sec": 48.69795340799425, "iter": 3110, "memory": 16133, "step": 3110} +{"base_lr": 1.5775093636133404e-05, "lr": 1.5775093636133404e-05, "data_time": 0.00979924201965332, "loss": 0.271484375, "time": 1.0543925762176514, "tflops": 3.2679304155850413, "tokens_per_sec": 54.05956119723267, "iter": 3120, "memory": 16133, "step": 3120} +{"base_lr": 1.5745731651668188e-05, "lr": 1.5745731651668188e-05, "data_time": 0.008790969848632812, "loss": 0.11669921875, "time": 1.2294678688049316, "tflops": 3.688486259672898, "tokens_per_sec": 61.00200086793692, "iter": 3130, "memory": 16133, "step": 3130} +{"base_lr": 1.571629553179681e-05, "lr": 1.571629553179681e-05, "data_time": 0.009827852249145508, "loss": 0.341796875, "time": 1.064455270767212, "tflops": 3.1233742211831848, "tokens_per_sec": 51.66962061290445, "iter": 3140, "memory": 16133, "step": 3140} +{"base_lr": 1.568678565632445e-05, "lr": 1.568678565632445e-05, "data_time": 0.00871729850769043, "loss": 0.154296875, "time": 0.9792685508728027, "tflops": 2.9627008505350076, "tokens_per_sec": 49.0161763667071, "iter": 3150, "memory": 16133, "step": 3150} +{"base_lr": 1.5657202406007956e-05, "lr": 1.5657202406007956e-05, "data_time": 0.009041786193847656, "loss": 1.3984375, "time": 1.3106389045715332, "tflops": 4.383893047913124, "tokens_per_sec": 72.48373268072977, "iter": 3160, "memory": 16133, "step": 3160} +{"base_lr": 1.5627546162550886e-05, "lr": 1.5627546162550886e-05, "data_time": 0.009351730346679688, "loss": 0.203125, "time": 1.012131690979004, "tflops": 2.687240629968313, "tokens_per_sec": 44.46061752737771, "iter": 3170, "memory": 16133, "step": 3170} +{"base_lr": 1.5597817308598624e-05, "lr": 1.5597817308598624e-05, "data_time": 0.009191751480102539, "loss": 0.1767578125, "time": 1.2985615730285645, "tflops": 2.3273803645283073, "tokens_per_sec": 38.504142613237214, "iter": 3180, "memory": 16133, "step": 3180} +{"base_lr": 1.5568016227733425e-05, "lr": 1.5568016227733425e-05, "data_time": 0.009057044982910156, "loss": 0.005950927734375, "time": 1.324871301651001, "tflops": 2.9661020357675465, "tokens_per_sec": 49.06136914502606, "iter": 3190, "memory": 16133, "step": 3190} +{"base_lr": 1.553814330446945e-05, "lr": 1.553814330446945e-05, "data_time": 0.009456157684326172, "loss": 1.3984375, "time": 1.4827933311462402, "tflops": 3.26243812155346, "tokens_per_sec": 53.95222538403503, "iter": 3200, "memory": 16133, "step": 3200} +{"base_lr": 1.550819892424782e-05, "lr": 1.550819892424782e-05, "data_time": 0.008918523788452148, "loss": 0.05517578125, "time": 0.9879844188690186, "tflops": 4.4062505802378755, "tokens_per_sec": 72.87564320330894, "iter": 3210, "memory": 16133, "step": 3210} +{"base_lr": 1.5478183473431665e-05, "lr": 1.5478183473431665e-05, "data_time": 0.008935213088989258, "loss": 0.1396484375, "time": 0.9760961532592773, "tflops": 2.97232988664629, "tokens_per_sec": 49.17548321410169, "iter": 3220, "memory": 16133, "step": 3220} +{"base_lr": 1.5448097339301097e-05, "lr": 1.5448097339301097e-05, "data_time": 0.008611202239990234, "loss": 1.3828125, "time": 1.0212745666503906, "tflops": 4.558939520855892, "tokens_per_sec": 75.39598313161923, "iter": 3230, "memory": 16133, "step": 3230} +{"base_lr": 1.5417940910048248e-05, "lr": 1.5417940910048248e-05, "data_time": 0.008686304092407227, "loss": 0.00213623046875, "time": 1.3057105541229248, "tflops": 3.148659674681471, "tokens_per_sec": 52.07892345300452, "iter": 3240, "memory": 16133, "step": 3240} +{"base_lr": 1.538771457477223e-05, "lr": 1.538771457477223e-05, "data_time": 0.008791685104370117, "loss": 0.2392578125, "time": 1.0038917064666748, "tflops": 4.095300461283751, "tokens_per_sec": 67.73638985351015, "iter": 3250, "memory": 16133, "step": 3250} +{"base_lr": 1.5357418723474136e-05, "lr": 1.5357418723474136e-05, "data_time": 0.008708477020263672, "loss": 0.004974365234375, "time": 1.0255343914031982, "tflops": 3.713857272169106, "tokens_per_sec": 61.43138692183511, "iter": 3260, "memory": 16133, "step": 3260} +{"base_lr": 1.5327053747052013e-05, "lr": 1.5327053747052013e-05, "data_time": 0.009549617767333984, "loss": 1.109375, "time": 1.3315346240997314, "tflops": 3.9967708875540278, "tokens_per_sec": 66.08915638181914, "iter": 3270, "memory": 16133, "step": 3270} +{"base_lr": 1.5296620037295813e-05, "lr": 1.5296620037295813e-05, "data_time": 0.00923466682434082, "loss": 1.328125, "time": 1.3701045513153076, "tflops": 4.237821783528653, "tokens_per_sec": 70.06764550031559, "iter": 3280, "memory": 16133, "step": 3280} +{"base_lr": 1.5266117986882298e-05, "lr": 1.5266117986882298e-05, "data_time": 0.008839845657348633, "loss": 0.2021484375, "time": 1.018662452697754, "tflops": 2.8481267380471387, "tokens_per_sec": 47.12061377430086, "iter": 3290, "memory": 16133, "step": 3290} +{"base_lr": 1.5235547989370041e-05, "lr": 1.5235547989370041e-05, "data_time": 0.009140491485595703, "loss": 1.28125, "time": 1.328181505203247, "tflops": 2.776487744988521, "tokens_per_sec": 45.927457776653384, "iter": 3300, "memory": 16133, "step": 3300} +{"base_lr": 1.5204910439194311e-05, "lr": 1.5204910439194311e-05, "data_time": 0.008849859237670898, "loss": 1.171875, "time": 1.2976090908050537, "tflops": 3.6347296322085816, "tokens_per_sec": 60.110552979825115, "iter": 3310, "memory": 16133, "step": 3310} +{"base_lr": 1.5174205731661983e-05, "lr": 1.5174205731661983e-05, "data_time": 0.009252786636352539, "loss": 2.078125, "time": 1.0034749507904053, "tflops": 5.48442620242747, "tokens_per_sec": 90.68487452350605, "iter": 3320, "memory": 16133, "step": 3320} +{"base_lr": 1.5143434262946438e-05, "lr": 1.5143434262946438e-05, "data_time": 0.008826732635498047, "loss": 2.078125, "time": 1.0237200260162354, "tflops": 5.021106991369395, "tokens_per_sec": 83.03051404659044, "iter": 3330, "memory": 16133, "step": 3330} +{"base_lr": 1.5112596430082465e-05, "lr": 1.5112596430082465e-05, "data_time": 0.008801460266113281, "loss": 0.0869140625, "time": 1.0467140674591064, "tflops": 3.2341077314975704, "tokens_per_sec": 53.500761803924384, "iter": 3340, "memory": 16133, "step": 3340} +{"base_lr": 1.5081692630961124e-05, "lr": 1.5081692630961124e-05, "data_time": 0.00915670394897461, "loss": 0.15625, "time": 1.2036550045013428, "tflops": 2.711906517915904, "tokens_per_sec": 44.86335353403575, "iter": 3350, "memory": 16133, "step": 3350} +{"base_lr": 1.5050723264324618e-05, "lr": 1.5050723264324618e-05, "data_time": 0.008967876434326172, "loss": 0.1337890625, "time": 0.9809086322784424, "tflops": 3.1427316301583814, "tokens_per_sec": 51.9926100369673, "iter": 3360, "memory": 16133, "step": 3360} +{"base_lr": 1.5019688729761144e-05, "lr": 1.5019688729761144e-05, "data_time": 0.008917808532714844, "loss": 1.4140625, "time": 1.201887845993042, "tflops": 3.571703746891272, "tokens_per_sec": 59.073731577074255, "iter": 3370, "memory": 16133, "step": 3370} +{"base_lr": 1.4988589427699757e-05, "lr": 1.4988589427699757e-05, "data_time": 0.009138822555541992, "loss": 0.6328125, "time": 0.9920437335968018, "tflops": 2.8635821300028774, "tokens_per_sec": 47.37694358448004, "iter": 3380, "memory": 16133, "step": 3380} +{"base_lr": 1.495742575940516e-05, "lr": 1.495742575940516e-05, "data_time": 0.009204626083374023, "loss": 0.1318359375, "time": 1.016796588897705, "tflops": 3.6267624346715546, "tokens_per_sec": 59.9923334381651, "iter": 3390, "memory": 16133, "step": 3390} +{"base_lr": 1.492619812697257e-05, "lr": 1.492619812697257e-05, "data_time": 0.008824825286865234, "loss": 1.4453125, "time": 0.9841251373291016, "tflops": 4.731033490500384, "tokens_per_sec": 78.24208231170522, "iter": 3400, "memory": 16133, "step": 3400} +{"base_lr": 1.489490693332252e-05, "lr": 1.489490693332252e-05, "data_time": 0.00846552848815918, "loss": 1.03125, "time": 0.9908437728881836, "tflops": 4.821132432313586, "tokens_per_sec": 79.7300262276921, "iter": 3410, "memory": 16133, "step": 3410} +{"base_lr": 1.4863552582195641e-05, "lr": 1.4863552582195641e-05, "data_time": 0.008738517761230469, "loss": 0.001007080078125, "time": 1.31965970993042, "tflops": 2.9778157470339215, "tokens_per_sec": 49.25512199154577, "iter": 3420, "memory": 16133, "step": 3420} +{"base_lr": 1.4832135478147472e-05, "lr": 1.4832135478147472e-05, "data_time": 0.008841514587402344, "loss": 1.46875, "time": 0.985058069229126, "tflops": 5.402563551949068, "tokens_per_sec": 89.33483491869325, "iter": 3430, "memory": 16133, "step": 3430} +{"base_lr": 1.4800656026543233e-05, "lr": 1.4800656026543233e-05, "data_time": 0.0086517333984375, "loss": 2.0, "time": 0.9856381416320801, "tflops": 6.259540046339592, "tokens_per_sec": 103.48625493631837, "iter": 3440, "memory": 16133, "step": 3440} +{"base_lr": 1.47691146335526e-05, "lr": 1.47691146335526e-05, "data_time": 0.009305715560913086, "loss": 1.25, "time": 0.9947657585144043, "tflops": 5.9585685794232255, "tokens_per_sec": 98.5156547268534, "iter": 3450, "memory": 16133, "step": 3450} +{"base_lr": 1.4737511706144447e-05, "lr": 1.4737511706144447e-05, "data_time": 0.009385347366333008, "loss": 1.8515625, "time": 1.0001778602600098, "tflops": 4.655101026068476, "tokens_per_sec": 76.98630719530806, "iter": 3460, "memory": 16133, "step": 3460} +{"base_lr": 1.4705847652081612e-05, "lr": 1.4705847652081612e-05, "data_time": 0.008870124816894531, "loss": 0.1357421875, "time": 0.998504638671875, "tflops": 2.9056247274245286, "tokens_per_sec": 48.071884837508016, "iter": 3470, "memory": 16133, "step": 3470} +{"base_lr": 1.467412287991563e-05, "lr": 1.467412287991563e-05, "data_time": 0.008509159088134766, "loss": 1.1015625, "time": 1.0126943588256836, "tflops": 4.537796936762116, "tokens_per_sec": 75.04732236096807, "iter": 3480, "memory": 16133, "step": 3480} +{"base_lr": 1.4642337798981483e-05, "lr": 1.4642337798981483e-05, "data_time": 0.009043455123901367, "loss": 0.1298828125, "time": 0.9830343723297119, "tflops": 3.1974671047357432, "tokens_per_sec": 52.89743824187074, "iter": 3490, "memory": 16133, "step": 3490} +{"base_lr": 1.4610492819392272e-05, "lr": 1.4610492819392272e-05, "data_time": 0.008821249008178711, "loss": 1.2421875, "time": 1.0021586418151855, "tflops": 4.464713211236948, "tokens_per_sec": 73.84060458321424, "iter": 3500, "memory": 16133, "step": 3500} +{"base_lr": 1.4578588352033964e-05, "lr": 1.4578588352033964e-05, "data_time": 0.010370731353759766, "loss": 0.034423828125, "time": 1.3368675708770752, "tflops": 2.7131915062144962, "tokens_per_sec": 44.88103482126586, "iter": 3510, "memory": 16133, "step": 3510} +{"base_lr": 1.4546624808560078e-05, "lr": 1.4546624808560078e-05, "data_time": 0.008873224258422852, "loss": 0.228515625, "time": 1.2724571228027344, "tflops": 2.993176185759351, "tokens_per_sec": 49.51050913305878, "iter": 3520, "memory": 16133, "step": 3520} +{"base_lr": 1.4514602601386363e-05, "lr": 1.4514602601386363e-05, "data_time": 0.008640050888061523, "loss": 0.94140625, "time": 1.2463462352752686, "tflops": 3.7356625725434585, "tokens_per_sec": 61.78058537877475, "iter": 3530, "memory": 16133, "step": 3530} +{"base_lr": 1.4482522143685513e-05, "lr": 1.4482522143685513e-05, "data_time": 0.00894927978515625, "loss": 0.06005859375, "time": 1.2745084762573242, "tflops": 4.223107035704852, "tokens_per_sec": 69.83084197390697, "iter": 3540, "memory": 16133, "step": 3540} +{"base_lr": 1.4450383849381785e-05, "lr": 1.4450383849381785e-05, "data_time": 0.00927734375, "loss": 1.640625, "time": 1.0097129344940186, "tflops": 4.371369296941274, "tokens_per_sec": 72.2977764333672, "iter": 3550, "memory": 16133, "step": 3550} +{"base_lr": 1.4418188133145694e-05, "lr": 1.4418188133145694e-05, "data_time": 0.009581565856933594, "loss": 0.11865234375, "time": 1.4085474014282227, "tflops": 2.188589877651272, "tokens_per_sec": 36.20751417258049, "iter": 3560, "memory": 16133, "step": 3560} +{"base_lr": 1.4385935410388659e-05, "lr": 1.4385935410388659e-05, "data_time": 0.008999109268188477, "loss": 0.328125, "time": 1.0602531433105469, "tflops": 2.508236052576896, "tokens_per_sec": 41.49952327665106, "iter": 3570, "memory": 16133, "step": 3570} +{"base_lr": 1.4353626097257624e-05, "lr": 1.4353626097257624e-05, "data_time": 0.009169816970825195, "loss": 3.25, "time": 1.007554531097412, "tflops": 3.9602952535481766, "tokens_per_sec": 65.50513938739212, "iter": 3580, "memory": 16133, "step": 3580} +{"base_lr": 1.432126061062971e-05, "lr": 1.432126061062971e-05, "data_time": 0.009079456329345703, "loss": 0.48828125, "time": 1.02058744430542, "tflops": 2.6057200426557037, "tokens_per_sec": 43.112425344309344, "iter": 3590, "memory": 16133, "step": 3590} +{"base_lr": 1.428883936810682e-05, "lr": 1.428883936810682e-05, "data_time": 0.00922250747680664, "loss": 0.0027923583984375, "time": 1.0277109146118164, "tflops": 5.0605182651770795, "tokens_per_sec": 83.68111963897937, "iter": 3600, "memory": 16133, "step": 3600} +{"base_lr": 1.4256362788010269e-05, "lr": 1.4256362788010269e-05, "data_time": 0.008763551712036133, "loss": 0.034423828125, "time": 1.276289701461792, "tflops": 3.3160726186454963, "tokens_per_sec": 54.84648189182362, "iter": 3610, "memory": 16133, "step": 3610} +{"base_lr": 1.4223831289375365e-05, "lr": 1.4223831289375365e-05, "data_time": 0.008897066116333008, "loss": 2.421875, "time": 1.2333135604858398, "tflops": 3.2844242736397153, "tokens_per_sec": 54.32519526790278, "iter": 3620, "memory": 16133, "step": 3620} +{"base_lr": 1.4191245291946015e-05, "lr": 1.4191245291946015e-05, "data_time": 0.009441137313842773, "loss": 1.0234375, "time": 0.9884388446807861, "tflops": 4.89410297455213, "tokens_per_sec": 80.93571031777375, "iter": 3630, "memory": 16133, "step": 3630} +{"base_lr": 1.4158605216169312e-05, "lr": 1.4158605216169312e-05, "data_time": 0.009128093719482422, "loss": 0.00421142578125, "time": 1.011660099029541, "tflops": 4.1834894314164615, "tokens_per_sec": 69.19320043073752, "iter": 3640, "memory": 16133, "step": 3640} +{"base_lr": 1.41259114831901e-05, "lr": 1.41259114831901e-05, "data_time": 0.008787870407104492, "loss": 0.00653076171875, "time": 1.333857536315918, "tflops": 3.172954545180478, "tokens_per_sec": 52.47936761918804, "iter": 3650, "memory": 16133, "step": 3650} +{"base_lr": 1.4093164514845549e-05, "lr": 1.4093164514845549e-05, "data_time": 0.009134769439697266, "loss": 1.0859375, "time": 1.006432056427002, "tflops": 4.6863155672082915, "tokens_per_sec": 77.50150594053534, "iter": 3660, "memory": 16133, "step": 3660} +{"base_lr": 1.4060364733659713e-05, "lr": 1.4060364733659713e-05, "data_time": 0.009139537811279297, "loss": 0.1640625, "time": 0.9896116256713867, "tflops": 3.6041183402313397, "tokens_per_sec": 59.619348105285994, "iter": 3670, "memory": 16134, "step": 3670} +{"base_lr": 1.4027512562838062e-05, "lr": 1.4027512562838062e-05, "data_time": 0.00873255729675293, "loss": 0.1845703125, "time": 1.0156748294830322, "tflops": 3.0351570162726618, "tokens_per_sec": 50.21292102503736, "iter": 3680, "memory": 16133, "step": 3680} +{"base_lr": 1.3994608426262038e-05, "lr": 1.3994608426262038e-05, "data_time": 0.010228395462036133, "loss": 0.09423828125, "time": 0.9775562286376953, "tflops": 3.7104543267972, "tokens_per_sec": 61.37754355425011, "iter": 3690, "memory": 16133, "step": 3690} +{"base_lr": 1.3961652748483592e-05, "lr": 1.3961652748483592e-05, "data_time": 0.009495019912719727, "loss": 0.82421875, "time": 1.0172357559204102, "tflops": 5.172147660565009, "tokens_per_sec": 85.52589652257707, "iter": 3700, "memory": 16133, "step": 3700} +{"base_lr": 1.3928645954719679e-05, "lr": 1.3928645954719679e-05, "data_time": 0.009466171264648438, "loss": 0.1337890625, "time": 1.3928873538970947, "tflops": 2.082924911663934, "tokens_per_sec": 34.46079100773553, "iter": 3710, "memory": 16133, "step": 3710} +{"base_lr": 1.3895588470846793e-05, "lr": 1.3895588470846793e-05, "data_time": 0.009355783462524414, "loss": 1.1328125, "time": 1.010042428970337, "tflops": 4.609637031083698, "tokens_per_sec": 76.23442123953103, "iter": 3720, "memory": 16133, "step": 3720} +{"base_lr": 1.3862480723395475e-05, "lr": 1.3862480723395475e-05, "data_time": 0.008955955505371094, "loss": 0.1298828125, "time": 0.9841020107269287, "tflops": 2.9481494163699122, "tokens_per_sec": 48.77543128328227, "iter": 3730, "memory": 16133, "step": 3730} +{"base_lr": 1.3829323139544796e-05, "lr": 1.3829323139544796e-05, "data_time": 0.009054899215698242, "loss": 0.361328125, "time": 1.03218412399292, "tflops": 2.9866111222789535, "tokens_per_sec": 49.40978921731644, "iter": 3740, "memory": 16133, "step": 3740} +{"base_lr": 1.3796116147116842e-05, "lr": 1.3796116147116842e-05, "data_time": 0.009134531021118164, "loss": 1.40625, "time": 1.2415947914123535, "tflops": 4.530132838406418, "tokens_per_sec": 74.9036647408408, "iter": 3750, "memory": 16133, "step": 3750} +{"base_lr": 1.3762860174571214e-05, "lr": 1.3762860174571214e-05, "data_time": 0.008994340896606445, "loss": 1.8984375, "time": 1.004883050918579, "tflops": 7.52639141547754, "tokens_per_sec": 124.39258467501384, "iter": 3760, "memory": 16133, "step": 3760} +{"base_lr": 1.372955565099949e-05, "lr": 1.372955565099949e-05, "data_time": 0.008923053741455078, "loss": 0.1865234375, "time": 1.3465838432312012, "tflops": 2.0647204898927107, "tokens_per_sec": 34.16051680048851, "iter": 3770, "memory": 16133, "step": 3770} +{"base_lr": 1.3696203006119672e-05, "lr": 1.3696203006119672e-05, "data_time": 0.00899505615234375, "loss": 0.00872802734375, "time": 1.0058338642120361, "tflops": 3.7264436458048436, "tokens_per_sec": 61.640398286359904, "iter": 3780, "memory": 16133, "step": 3780} +{"base_lr": 1.3662802670270674e-05, "lr": 1.3662802670270674e-05, "data_time": 0.008962392807006836, "loss": 2.0, "time": 1.2341103553771973, "tflops": 4.557606466929228, "tokens_per_sec": 75.35792856344669, "iter": 3790, "memory": 16133, "step": 3790} +{"base_lr": 1.3629355074406739e-05, "lr": 1.3629355074406739e-05, "data_time": 0.00923776626586914, "loss": 1.3671875, "time": 1.3311965465545654, "tflops": 2.9520084582010067, "tokens_per_sec": 48.82825167191555, "iter": 3800, "memory": 16133, "step": 3800} +{"base_lr": 1.35958606500919e-05, "lr": 1.35958606500919e-05, "data_time": 0.00881505012512207, "loss": 0.2138671875, "time": 1.0090088844299316, "tflops": 2.8753758399385005, "tokens_per_sec": 47.5714344448725, "iter": 3810, "memory": 16133, "step": 3810} +{"base_lr": 1.3562319829494396e-05, "lr": 1.3562319829494396e-05, "data_time": 0.009497642517089844, "loss": 1.2265625, "time": 0.9851703643798828, "tflops": 5.401947737963513, "tokens_per_sec": 89.32465204158107, "iter": 3820, "memory": 16133, "step": 3820} +{"base_lr": 1.3528733045381118e-05, "lr": 1.3528733045381118e-05, "data_time": 0.008884429931640625, "loss": 0.0019989013671875, "time": 0.9838395118713379, "tflops": 3.8097506420522387, "tokens_per_sec": 63.0184082381569, "iter": 3830, "memory": 16133, "step": 3830} +{"base_lr": 1.3495100731111991e-05, "lr": 1.3495100731111991e-05, "data_time": 0.00897836685180664, "loss": 1.15625, "time": 1.2703604698181152, "tflops": 4.475221318340848, "tokens_per_sec": 73.99474576958815, "iter": 3840, "memory": 16133, "step": 3840} +{"base_lr": 1.346142332063441e-05, "lr": 1.346142332063441e-05, "data_time": 0.009162425994873047, "loss": 0.06201171875, "time": 1.0033824443817139, "tflops": 4.700558834550458, "tokens_per_sec": 77.73705872239573, "iter": 3850, "memory": 16133, "step": 3850} +{"base_lr": 1.3427701248477623e-05, "lr": 1.3427701248477623e-05, "data_time": 0.009229421615600586, "loss": 1.53125, "time": 0.9922597408294678, "tflops": 4.1433084497967405, "tokens_per_sec": 68.53044339286373, "iter": 3860, "memory": 16133, "step": 3860} +{"base_lr": 1.3393934949747153e-05, "lr": 1.3393934949747153e-05, "data_time": 0.008948326110839844, "loss": 1.4765625, "time": 1.2091026306152344, "tflops": 4.201187874351976, "tokens_per_sec": 69.4730107047971, "iter": 3870, "memory": 16133, "step": 3870} +{"base_lr": 1.3360124860119159e-05, "lr": 1.3360124860119159e-05, "data_time": 0.008706092834472656, "loss": 0.0255126953125, "time": 1.1507222652435303, "tflops": 3.520158701553598, "tokens_per_sec": 58.2243014006185, "iter": 3880, "memory": 16133, "step": 3880} +{"base_lr": 1.3326271415834807e-05, "lr": 1.3326271415834807e-05, "data_time": 0.008657693862915039, "loss": 0.19140625, "time": 1.152904987335205, "tflops": 2.8312826190247957, "tokens_per_sec": 46.8382048765071, "iter": 3890, "memory": 16133, "step": 3890} +{"base_lr": 1.3292375053694664e-05, "lr": 1.3292375053694664e-05, "data_time": 0.0090789794921875, "loss": 0.1435546875, "time": 0.981360673904419, "tflops": 3.4494820795786807, "tokens_per_sec": 57.06362756227293, "iter": 3900, "memory": 16133, "step": 3900} +{"base_lr": 1.3258436211053052e-05, "lr": 1.3258436211053052e-05, "data_time": 0.009073495864868164, "loss": 1.0546875, "time": 1.2440695762634277, "tflops": 3.5965439660594782, "tokens_per_sec": 59.48220373831508, "iter": 3910, "memory": 16133, "step": 3910} +{"base_lr": 1.3224455325812411e-05, "lr": 1.3224455325812411e-05, "data_time": 0.00881052017211914, "loss": 0.2275390625, "time": 0.9861588478088379, "tflops": 3.1873364776199935, "tokens_per_sec": 52.729841764830184, "iter": 3920, "memory": 16133, "step": 3920} +{"base_lr": 1.319043283641764e-05, "lr": 1.319043283641764e-05, "data_time": 0.009159326553344727, "loss": 0.9296875, "time": 1.0149836540222168, "tflops": 5.064325676049185, "tokens_per_sec": 83.74519103147615, "iter": 3930, "memory": 16133, "step": 3930} +{"base_lr": 1.3156369181850437e-05, "lr": 1.3156369181850437e-05, "data_time": 0.00910806655883789, "loss": 1.015625, "time": 1.0018503665924072, "tflops": 4.0432435123477415, "tokens_per_sec": 66.87625441294209, "iter": 3940, "memory": 16133, "step": 3940} +{"base_lr": 1.3122264801623653e-05, "lr": 1.3122264801623653e-05, "data_time": 0.009016990661621094, "loss": 0.01165771484375, "time": 1.0115966796875, "tflops": 4.123929063306735, "tokens_per_sec": 68.20900205133839, "iter": 3950, "memory": 16133, "step": 3950} +{"base_lr": 1.30881201357756e-05, "lr": 1.30881201357756e-05, "data_time": 0.009587764739990234, "loss": 0.138671875, "time": 1.0010924339294434, "tflops": 2.898113770758538, "tokens_per_sec": 47.947620392599106, "iter": 3960, "memory": 16133, "step": 3960} +{"base_lr": 1.3053935624864385e-05, "lr": 1.3053935624864385e-05, "data_time": 0.009809017181396484, "loss": 0.94921875, "time": 1.0106091499328613, "tflops": 4.427380187647045, "tokens_per_sec": 73.22316446951116, "iter": 3970, "memory": 16133, "step": 3970} +{"base_lr": 1.3019711709962229e-05, "lr": 1.3019711709962229e-05, "data_time": 0.009257078170776367, "loss": 0.03271484375, "time": 0.9830799102783203, "tflops": 4.797634621620864, "tokens_per_sec": 79.34248191262299, "iter": 3980, "memory": 16133, "step": 3980} +{"base_lr": 1.2985448832649766e-05, "lr": 1.2985448832649766e-05, "data_time": 0.008626461029052734, "loss": 0.0142822265625, "time": 1.0226378440856934, "tflops": 4.020228854573343, "tokens_per_sec": 66.49470327467692, "iter": 3990, "memory": 16133, "step": 3990} +{"base_lr": 1.2951147435010356e-05, "lr": 1.2951147435010356e-05, "data_time": 0.008900642395019531, "loss": 1.8046875, "time": 1.0054302215576172, "tflops": 4.089033809032278, "tokens_per_sec": 67.63273924130354, "iter": 4000, "memory": 16133, "step": 4000} +{"base_lr": 1.2916807959624364e-05, "lr": 1.2916807959624364e-05, "data_time": 0.009252786636352539, "loss": 1.296875, "time": 1.0037798881530762, "tflops": 5.543083785534008, "tokens_per_sec": 91.65355979505176, "iter": 4010, "memory": 16133, "step": 4010} +{"base_lr": 1.2882430849563464e-05, "lr": 1.2882430849563464e-05, "data_time": 0.008749246597290039, "loss": 1.0859375, "time": 0.9890079498291016, "tflops": 4.707675994264583, "tokens_per_sec": 77.85579480248624, "iter": 4020, "memory": 16133, "step": 4020} +{"base_lr": 1.2848016548384929e-05, "lr": 1.2848016548384929e-05, "data_time": 0.008770227432250977, "loss": 1.0859375, "time": 0.9968030452728271, "tflops": 5.399648143805008, "tokens_per_sec": 89.28544151422734, "iter": 4030, "memory": 16133, "step": 4030} +{"base_lr": 1.2813565500125892e-05, "lr": 1.2813565500125892e-05, "data_time": 0.009089469909667969, "loss": 1.390625, "time": 0.9872255325317383, "tflops": 4.470942024009726, "tokens_per_sec": 73.94460292443782, "iter": 4040, "memory": 16133, "step": 4040} +{"base_lr": 1.2779078149297606e-05, "lr": 1.2779078149297606e-05, "data_time": 0.00900125503540039, "loss": 0.66015625, "time": 0.9902074337005615, "tflops": 3.7852505287913893, "tokens_per_sec": 62.613143357481775, "iter": 4050, "memory": 16133, "step": 4050} +{"base_lr": 1.2744554940879755e-05, "lr": 1.2744554940879755e-05, "data_time": 0.009060144424438477, "loss": 0.17578125, "time": 0.9813299179077148, "tflops": 3.0181108116867876, "tokens_per_sec": 49.9322390011532, "iter": 4060, "memory": 16133, "step": 4060} +{"base_lr": 1.2709996320314655e-05, "lr": 1.2709996320314655e-05, "data_time": 0.00896763801574707, "loss": 0.17578125, "time": 0.9897778034210205, "tflops": 2.9923508335225484, "tokens_per_sec": 49.5060606841144, "iter": 4070, "memory": 16133, "step": 4070} +{"base_lr": 1.2675402733501543e-05, "lr": 1.2675402733501543e-05, "data_time": 0.009120941162109375, "loss": 0.04345703125, "time": 0.9844691753387451, "tflops": 3.930234897417405, "tokens_per_sec": 65.00965353020148, "iter": 4080, "memory": 16133, "step": 4080} +{"base_lr": 1.2640774626790823e-05, "lr": 1.2640774626790823e-05, "data_time": 0.009131669998168945, "loss": 0.126953125, "time": 0.9973714351654053, "tflops": 4.304100931228068, "tokens_per_sec": 71.1871199601321, "iter": 4090, "memory": 16133, "step": 4090} +{"base_lr": 1.2606112446978292e-05, "lr": 1.2606112446978292e-05, "data_time": 0.009125709533691406, "loss": 1.5, "time": 0.9915773868560791, "tflops": 4.512356763251712, "tokens_per_sec": 74.62856755391698, "iter": 4100, "memory": 16133, "step": 4100} +{"base_lr": 1.2571416641299383e-05, "lr": 1.2571416641299383e-05, "data_time": 0.009391069412231445, "loss": 1.6484375, "time": 1.011131763458252, "tflops": 4.185675384186027, "tokens_per_sec": 69.22935519354887, "iter": 4110, "memory": 16133, "step": 4110} +{"base_lr": 1.2536687657423391e-05, "lr": 1.2536687657423391e-05, "data_time": 0.009079217910766602, "loss": 0.00994873046875, "time": 0.9986855983734131, "tflops": 4.843888304581704, "tokens_per_sec": 80.10529052408297, "iter": 4120, "memory": 16133, "step": 4120} +{"base_lr": 1.2501925943447699e-05, "lr": 1.2501925943447699e-05, "data_time": 0.009150981903076172, "loss": 0.05322265625, "time": 1.012336015701294, "tflops": 4.06113988320754, "tokens_per_sec": 67.17137288929304, "iter": 4130, "memory": 16133, "step": 4130} +{"base_lr": 1.2467131947892006e-05, "lr": 1.2467131947892006e-05, "data_time": 0.010026216506958008, "loss": 0.004241943359375, "time": 1.0613949298858643, "tflops": 4.50067068752739, "tokens_per_sec": 74.43035365584488, "iter": 4140, "memory": 16133, "step": 4140} +{"base_lr": 1.243230611969251e-05, "lr": 1.243230611969251e-05, "data_time": 0.009114265441894531, "loss": 1.6328125, "time": 1.0191013813018799, "tflops": 4.628055951946801, "tokens_per_sec": 76.5380181315034, "iter": 4150, "memory": 16133, "step": 4150} +{"base_lr": 1.2397448908196162e-05, "lr": 1.2397448908196162e-05, "data_time": 0.008903741836547852, "loss": 1.609375, "time": 1.6103813648223877, "tflops": 4.282575536825313, "tokens_per_sec": 70.79068504527963, "iter": 4160, "memory": 16136, "step": 4160} +{"base_lr": 1.2362560763154815e-05, "lr": 1.2362560763154815e-05, "data_time": 0.008901834487915039, "loss": 0.0257568359375, "time": 1.0015063285827637, "tflops": 5.313834440668632, "tokens_per_sec": 87.86764245857671, "iter": 4170, "memory": 16133, "step": 4170} +{"base_lr": 1.2327642134719464e-05, "lr": 1.2327642134719464e-05, "data_time": 0.009127616882324219, "loss": 1.3984375, "time": 1.2856106758117676, "tflops": 3.1037494494026143, "tokens_per_sec": 51.33747038797306, "iter": 4180, "memory": 16133, "step": 4180} +{"base_lr": 1.229269347343442e-05, "lr": 1.229269347343442e-05, "data_time": 0.008862495422363281, "loss": 2.296875, "time": 1.3148260116577148, "tflops": 4.646298141039297, "tokens_per_sec": 76.81624724824522, "iter": 4190, "memory": 16133, "step": 4190} +{"base_lr": 1.22577152302315e-05, "lr": 1.22577152302315e-05, "data_time": 0.009611845016479492, "loss": 0.007049560546875, "time": 1.008434772491455, "tflops": 4.436926462594263, "tokens_per_sec": 73.3810475585853, "iter": 4200, "memory": 16133, "step": 4200} +{"base_lr": 1.2222707856424208e-05, "lr": 1.2222707856424208e-05, "data_time": 0.009228229522705078, "loss": 1.71875, "time": 1.0133864879608154, "tflops": 5.55029044041086, "tokens_per_sec": 91.77150189464955, "iter": 4210, "memory": 16133, "step": 4210} +{"base_lr": 1.2187671803701902e-05, "lr": 1.2187671803701902e-05, "data_time": 0.009535789489746094, "loss": 0.201171875, "time": 1.2284510135650635, "tflops": 3.642270532940041, "tokens_per_sec": 60.2384622445675, "iter": 4220, "memory": 16133, "step": 4220} +{"base_lr": 1.2152607524123983e-05, "lr": 1.2152607524123983e-05, "data_time": 0.009298086166381836, "loss": 0.875, "time": 0.9663305282592773, "tflops": 5.006073334585817, "tokens_per_sec": 82.78740830430674, "iter": 4230, "memory": 16133, "step": 4230} +{"base_lr": 1.2117515470114048e-05, "lr": 1.2117515470114048e-05, "data_time": 0.009401321411132812, "loss": 1.2421875, "time": 0.9997000694274902, "tflops": 3.6282659661248093, "tokens_per_sec": 60.018001233410814, "iter": 4240, "memory": 16133, "step": 4240} +{"base_lr": 1.2082396094454069e-05, "lr": 1.2082396094454069e-05, "data_time": 0.009211063385009766, "loss": 1.2109375, "time": 1.0195677280426025, "tflops": 4.0323345428604815, "tokens_per_sec": 66.6949317143274, "iter": 4250, "memory": 16133, "step": 4250} +{"base_lr": 1.2047249850278538e-05, "lr": 1.2047249850278538e-05, "data_time": 0.009305953979492188, "loss": 1.4609375, "time": 1.031794548034668, "tflops": 4.219160614096897, "tokens_per_sec": 69.7813340234, "iter": 4260, "memory": 16133, "step": 4260} +{"base_lr": 1.201207719106862e-05, "lr": 1.201207719106862e-05, "data_time": 0.00943899154663086, "loss": 0.1552734375, "time": 0.9861104488372803, "tflops": 3.3715210666683055, "tokens_per_sec": 55.774685345637046, "iter": 4270, "memory": 16133, "step": 4270} +{"base_lr": 1.1976878570646298e-05, "lr": 1.1976878570646298e-05, "data_time": 0.009577751159667969, "loss": 0.1416015625, "time": 1.2193880081176758, "tflops": 2.379291701459504, "tokens_per_sec": 39.36400856857405, "iter": 4280, "memory": 16133, "step": 4280} +{"base_lr": 1.1941654443168541e-05, "lr": 1.1941654443168541e-05, "data_time": 0.00910806655883789, "loss": 0.99609375, "time": 1.0118343830108643, "tflops": 4.960424879055674, "tokens_per_sec": 82.02923461934459, "iter": 4290, "memory": 16133, "step": 4290} +{"base_lr": 1.190640526312141e-05, "lr": 1.190640526312141e-05, "data_time": 0.009569883346557617, "loss": 0.1435546875, "time": 1.3656890392303467, "tflops": 2.1244073030043005, "tokens_per_sec": 35.14709324094446, "iter": 4300, "memory": 16133, "step": 4300} +{"base_lr": 1.1871131485314226e-05, "lr": 1.1871131485314226e-05, "data_time": 0.00888204574584961, "loss": 0.77734375, "time": 0.9838275909423828, "tflops": 5.163168178401214, "tokens_per_sec": 85.38081344054726, "iter": 4310, "memory": 16133, "step": 4310} +{"base_lr": 1.1835833564873684e-05, "lr": 1.1835833564873684e-05, "data_time": 0.009006977081298828, "loss": 1.15625, "time": 0.9757585525512695, "tflops": 4.647538398652238, "tokens_per_sec": 76.86327709229317, "iter": 4320, "memory": 16133, "step": 4320} +{"base_lr": 1.1800511957237978e-05, "lr": 1.1800511957237978e-05, "data_time": 0.009151697158813477, "loss": 1.125, "time": 0.9888310432434082, "tflops": 3.668147114737654, "tokens_per_sec": 60.677706681959286, "iter": 4330, "memory": 16133, "step": 4330} +{"base_lr": 1.1765167118150939e-05, "lr": 1.1765167118150939e-05, "data_time": 0.009471893310546875, "loss": 0.984375, "time": 0.996485710144043, "tflops": 2.850817306048984, "tokens_per_sec": 47.16575413124483, "iter": 4340, "memory": 16133, "step": 4340} +{"base_lr": 1.172979950365613e-05, "lr": 1.172979950365613e-05, "data_time": 0.009004831314086914, "loss": 1.1015625, "time": 0.9955077171325684, "tflops": 5.163403247782491, "tokens_per_sec": 85.38356713571861, "iter": 4350, "memory": 16133, "step": 4350} +{"base_lr": 1.1694409570091004e-05, "lr": 1.1694409570091004e-05, "data_time": 0.009333610534667969, "loss": 1.1640625, "time": 0.9829099178314209, "tflops": 3.8133537407962534, "tokens_per_sec": 63.07800834559343, "iter": 4360, "memory": 16133, "step": 4360} +{"base_lr": 1.1658997774080974e-05, "lr": 1.1658997774080974e-05, "data_time": 0.009241342544555664, "loss": 1.171875, "time": 1.3417644500732422, "tflops": 4.643296957995326, "tokens_per_sec": 76.76459157514631, "iter": 4370, "memory": 16133, "step": 4370} +{"base_lr": 1.1623564572533535e-05, "lr": 1.1623564572533535e-05, "data_time": 0.009459972381591797, "loss": 0.1279296875, "time": 1.0478911399841309, "tflops": 3.461407010552134, "tokens_per_sec": 57.25785600291589, "iter": 4380, "memory": 16133, "step": 4380} +{"base_lr": 1.1588110422632374e-05, "lr": 1.1588110422632374e-05, "data_time": 0.009249210357666016, "loss": 1.109375, "time": 1.0080795288085938, "tflops": 4.378452289164265, "tokens_per_sec": 72.41492155505148, "iter": 4390, "memory": 16133, "step": 4390} +{"base_lr": 1.1552635781831467e-05, "lr": 1.1552635781831467e-05, "data_time": 0.009271383285522461, "loss": 0.045166015625, "time": 1.1679422855377197, "tflops": 3.4682578456379645, "tokens_per_sec": 57.36584832108881, "iter": 4400, "memory": 16133, "step": 4400} +{"base_lr": 1.1517141107849187e-05, "lr": 1.1517141107849187e-05, "data_time": 0.00890660285949707, "loss": 0.1474609375, "time": 0.9980387687683105, "tflops": 2.906981030560156, "tokens_per_sec": 48.09432409042504, "iter": 4410, "memory": 16133, "step": 4410} +{"base_lr": 1.1481626858662382e-05, "lr": 1.1481626858662382e-05, "data_time": 0.009180068969726562, "loss": 2.59375, "time": 0.9995388984680176, "tflops": 5.44544511363878, "tokens_per_sec": 90.04151828193179, "iter": 4420, "memory": 16133, "step": 4420} +{"base_lr": 1.144609349250047e-05, "lr": 1.144609349250047e-05, "data_time": 0.008986473083496094, "loss": 0.1875, "time": 1.002075433731079, "tflops": 2.6538572540468284, "tokens_per_sec": 43.90887004996083, "iter": 4430, "memory": 16133, "step": 4430} +{"base_lr": 1.1410541467839533e-05, "lr": 1.1410541467839533e-05, "data_time": 0.007593631744384766, "loss": 0.0234375, "time": 1.341017723083496, "tflops": 3.2462709805944168, "tokens_per_sec": 53.69056557611458, "iter": 4440, "memory": 16133, "step": 4440} +{"base_lr": 1.1374971243396397e-05, "lr": 1.1374971243396397e-05, "data_time": 0.010661125183105469, "loss": 0.07666015625, "time": 1.0108795166015625, "tflops": 3.887410319844281, "tokens_per_sec": 64.30044227076311, "iter": 4450, "memory": 16133, "step": 4450} +{"base_lr": 1.1339383278122715e-05, "lr": 1.1339383278122715e-05, "data_time": 0.008971929550170898, "loss": 1.203125, "time": 1.0368342399597168, "tflops": 4.7240487899257415, "tokens_per_sec": 78.1224200341502, "iter": 4460, "memory": 16133, "step": 4460} +{"base_lr": 1.130377803119904e-05, "lr": 1.130377803119904e-05, "data_time": 0.009309530258178711, "loss": 1.2578125, "time": 0.9963138103485107, "tflops": 5.280759415937754, "tokens_per_sec": 87.32188502885458, "iter": 4470, "memory": 16133, "step": 4470} +{"base_lr": 1.1268155962028914e-05, "lr": 1.1268155962028914e-05, "data_time": 0.0089874267578125, "loss": 1.3984375, "time": 1.6541407108306885, "tflops": 3.6932007136735105, "tokens_per_sec": 61.058892595188, "iter": 4480, "memory": 16133, "step": 4480} +{"base_lr": 1.123251753023293e-05, "lr": 1.123251753023293e-05, "data_time": 0.009059667587280273, "loss": 1.15625, "time": 0.9944314956665039, "tflops": 6.508748321418948, "tokens_per_sec": 107.59916642440729, "iter": 4490, "memory": 16133, "step": 4490} +{"base_lr": 1.1196863195642791e-05, "lr": 1.1196863195642791e-05, "data_time": 0.008948087692260742, "loss": 0.0035552978515625, "time": 1.2272980213165283, "tflops": 3.1526125204686175, "tokens_per_sec": 52.14707339892453, "iter": 4500, "memory": 16133, "step": 4500} +{"base_lr": 1.1161193418295398e-05, "lr": 1.1161193418295398e-05, "data_time": 0.00943446159362793, "loss": 0.138671875, "time": 1.2930543422698975, "tflops": 2.337292879657515, "tokens_per_sec": 38.668135101103815, "iter": 4510, "memory": 16133, "step": 4510} +{"base_lr": 1.1125508658426907e-05, "lr": 1.1125508658426907e-05, "data_time": 0.009380817413330078, "loss": 0.1396484375, "time": 0.9910192489624023, "tflops": 5.247879756281161, "tokens_per_sec": 86.77934368071585, "iter": 4520, "memory": 16133, "step": 4520} +{"base_lr": 1.108980937646679e-05, "lr": 1.108980937646679e-05, "data_time": 0.009600400924682617, "loss": 0.1435546875, "time": 1.2192633152008057, "tflops": 2.2803271596995573, "tokens_per_sec": 37.72769952681332, "iter": 4530, "memory": 16133, "step": 4530} +{"base_lr": 1.1054096033031876e-05, "lr": 1.1054096033031876e-05, "data_time": 0.009891748428344727, "loss": 1.359375, "time": 1.2980077266693115, "tflops": 3.3072124569643253, "tokens_per_sec": 54.699212139615945, "iter": 4540, "memory": 16133, "step": 4540} +{"base_lr": 1.1018369088920448e-05, "lr": 1.1018369088920448e-05, "data_time": 0.008825063705444336, "loss": 1.1953125, "time": 1.003819227218628, "tflops": 3.975031877255497, "tokens_per_sec": 65.74889004946276, "iter": 4550, "memory": 16133, "step": 4550} +{"base_lr": 1.0982629005106262e-05, "lr": 1.0982629005106262e-05, "data_time": 0.008906126022338867, "loss": 1.65625, "time": 0.9924688339233398, "tflops": 4.87423012649361, "tokens_per_sec": 80.60706519485402, "iter": 4560, "memory": 16133, "step": 4560} +{"base_lr": 1.0946876242732608e-05, "lr": 1.0946876242732608e-05, "data_time": 0.008928060531616211, "loss": 0.1259765625, "time": 1.3468871116638184, "tflops": 2.4235140597620606, "tokens_per_sec": 40.092446896498515, "iter": 4570, "memory": 16133, "step": 4570} +{"base_lr": 1.0911111263106378e-05, "lr": 1.0911111263106378e-05, "data_time": 0.00874018669128418, "loss": 0.00775146484375, "time": 0.9942820072174072, "tflops": 4.013160650832835, "tokens_per_sec": 66.37955783253175, "iter": 4580, "memory": 16133, "step": 4580} +{"base_lr": 1.0875334527692089e-05, "lr": 1.0875334527692089e-05, "data_time": 0.009366750717163086, "loss": 0.045654296875, "time": 0.9909460544586182, "tflops": 3.782429119288392, "tokens_per_sec": 62.56647344320856, "iter": 4590, "memory": 16133, "step": 4590} +{"base_lr": 1.0839546498105935e-05, "lr": 1.0839546498105935e-05, "data_time": 0.008839845657348633, "loss": 1.5, "time": 0.9813961982727051, "tflops": 6.286596004350249, "tokens_per_sec": 103.9335593304926, "iter": 4600, "memory": 16133, "step": 4600} diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..383eb9555c989a97174eb3e77544c078e8983253 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a28678eb3dc6429dcabf65d7724b98cc43f6225d47b10e57d31cf5dce33a0ec +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22f1f73a920125eed516224b92616addfff67fd9 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec4d8aa103c7b7b58786013531cd939ddb49125e28a90ee791cff3c115dd4cb +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba947a72de2380960a5b64d6fc77876947f6951d --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ff35fa2d294789df71fb685891df93ef00350afaabdd1ead1ff8b2550f2e1e +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..37895f7c299f59e88e81921c7523b628cc3dab82 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478c62f01e77c8cd856001abf0ffddb4b6ffed8b342c5e6780da31797b9b27dc +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b92a62bffd710f454dda6333b2cd3ecf0609b40 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d35ac2f6cce58392b20ca9d8ce85f6d2d7a9992851916ad543ee252c9632648a +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a02b9a6d3604b8f542c0dc497acd7f446663fd7e --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd90ff6ad347780f3295a3d20b8a7021d92a0c16f3297ee038fdb122fd98c3d +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22f4f411729b2f982afcafa05bd2dce39226dc09 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21da77a582762da9cf57e5de6421f48157dfcb190a44db580ace941aea1c7dc4 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bea42410bc3a1b2fc1e46c808f8bab181ab9c49c --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6253fcfdf1047ca70c85c5c59ecc46f19971081742bc3d15727a7f18eb80d4 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/mp_rank_00_model_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6202535a4cabfbbe0c33194fb89a7680e7682ade --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4096.pth/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cec4603b1782bace539ad6dcfc1a5a70cf59177b193fa4b8bb850ed304003f7 +size 816345224 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6400ca1d73ca4ac59474f625b206afd1f0492659 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59445f3060bcf0b356c4db24c03801cdf3a495d7f8e7876e3ff19fb4eefad7df +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9cba50f60fa144e762a734888f69e5e668e2a59 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11b09176a772f17783f976b80df5b65fc844be287a16049b4ddb16fb0070a68 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de22540559617b0b0d4921f18b0ac8f4fd9ca20e --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9360d83af5a87c9e82d2c5a7bec96675f95172b647e79d2c64caf1f7e3a2c632 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e6b014e956aacb2d141d8d31de57237669758be --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6decd45e95c679ff940c5ee7676e06e7fc313d589c7bd65af1395df1562be8 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..516f7850b9c002ec28ae8df523ba296e5719733f --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02610aa01d72c8d5966b55db7a809573d33195c8eee3ac93a67dfd0bda57b0a +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c1d40f79df436ad2fd813eb97f7f3cab823ee8e --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e651db681814b6e7beb541136a6a3273d60006d2ef984757aed1181831cf6a +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0191d745b282ffd13d4839a7981a0f9fe7cacc33 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b1a6c920baaf3c8c98e30d594957c76f0437b461f2c181234e95f2aed5ead9 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ef06ba0d36421dbaad22cc5544874fe39a626cb --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452055e852d1d0d831b6ddc97a19768823cd8a2f5037bc36d9850374ad4e9ad1 +size 612302570 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/mp_rank_00_model_states.pt b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..69efa1a9d5015418b2bb8e580ae350c227c76385 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/iter_4603.pth/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79cbebdc1508c846a823fb81718f0aa08b9d472b84caaf59e40f4ce74d16f93b +size 816412040 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/last_checkpoint b/stage_2/multi_stage2_run_stage1_both/stage2b/last_checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..3dc2b390ea9b3db2b7cfab4f339a34ba45984a16 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/last_checkpoint @@ -0,0 +1 @@ +/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b/iter_4603.pth \ No newline at end of file diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/temp_config_stage_2b.py b/stage_2/multi_stage2_run_stage1_both/stage2b/temp_config_stage_2b.py new file mode 100644 index 0000000000000000000000000000000000000000..b2393a84fb0d3988d229c7a3fcbf3d579f748c00 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/temp_config_stage_2b.py @@ -0,0 +1,261 @@ +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2a/iter_7452.pth' +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-05, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2b_medium.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b' diff --git a/stage_2/multi_stage2_run_stage1_both/stage2b/zero_to_fp32.py b/stage_2/multi_stage2_run_stage1_both/stage2b/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2b/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/20250926_014218.log b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/20250926_014218.log new file mode 100644 index 0000000000000000000000000000000000000000..87c6c1da6f88b40061b45d590b90801957213af7 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/20250926_014218.log @@ -0,0 +1,1160 @@ +2025/09/26 01:42:18 - mmengine - INFO - +------------------------------------------------------------ +System environment: + sys.platform: linux + Python: 3.11.2 (main, May 2 2024, 11:59:08) [GCC 12.2.0] + CUDA available: True + MUSA available: False + numpy_random_seed: 2102626718 + GPU 0,1,2,3,4,5,6,7: NVIDIA H100 80GB HBM3 + CUDA_HOME: /usr/local/cuda + NVCC: Cuda compilation tools, release 12.4, V12.4.131 + GCC: x86_64-linux-gnu-gcc (Debian 12.2.0-14) 12.2.0 + PyTorch: 2.4.1 + PyTorch compiling details: PyTorch built with: + - GCC 12.2 + - C++ Version: 201703 + - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications + - Intel(R) MKL-DNN v3.4.2 (Git Hash 1137e04ec0b5251ca2b4400a4fd3c667ce843d67) + - OpenMP 201511 (a.k.a. OpenMP 4.5) + - LAPACK is enabled (usually provided by MKL) + - NNPACK is enabled + - CPU capability usage: AVX512 + - CUDA Runtime 12.4 + - NVCC architecture flags: -gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90 + - CuDNN 90.4 (built against CUDA 12.6) + - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.4, CUDNN_VERSION=9.4.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0 -Wno-uninitialized -Wno-maybe-uninitialized -Wno-nonnull -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.4.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=ON, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, + + TorchVision: 0.19.1+cu124 + OpenCV: 4.10.0 + MMEngine: 0.10.7 + +Runtime environment: + launcher: pytorch + randomness: {'deterministic': False, 'seed': None} + cudnn_benchmark: False + dist_cfg: {'backend': 'nccl'} + mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} + deterministic: False + seed: None + Distributed launcher: pytorch + Distributed training: True + GPU number: 8 +------------------------------------------------------------ + +2025/09/26 01:42:20 - mmengine - INFO - Config: +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b/iter_4603.pth' +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-05, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2c_hard.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2c' + +2025/09/26 01:42:21 - mmengine - WARNING - Failed to search registry with scope "mmengine" in the "builder" registry tree. As a workaround, the current "builder" registry in "xtuner" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "mmengine" is a correct scope, or whether the registry is initialized. +2025/09/26 01:42:22 - mmengine - INFO - Hooks will be executed in the following order: +before_run: +(VERY_HIGH ) RuntimeInfoHook +(55 ) ThroughputHook +(BELOW_NORMAL) LoggerHook + -------------------- +before_train: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(NORMAL ) DatasetInfoHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +before_train_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(NORMAL ) DistSamplerSeedHook + -------------------- +before_train_iter: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook + -------------------- +after_train_iter: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(55 ) ThroughputHook +(BELOW_NORMAL) LoggerHook +(LOW ) ParamSchedulerHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +after_train_epoch: +(NORMAL ) IterTimerHook +(LOW ) ParamSchedulerHook +(VERY_LOW ) CheckpointHook + -------------------- +before_val: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) DatasetInfoHook + -------------------- +before_val_epoch: +(NORMAL ) IterTimerHook + -------------------- +before_val_iter: +(NORMAL ) IterTimerHook + -------------------- +after_val_iter: +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_val_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook +(LOW ) ParamSchedulerHook +(VERY_LOW ) CheckpointHook + -------------------- +after_val: +(VERY_HIGH ) RuntimeInfoHook +(LOW ) EvaluateChatHook + -------------------- +after_train: +(VERY_HIGH ) RuntimeInfoHook +(LOW ) EvaluateChatHook +(VERY_LOW ) CheckpointHook + -------------------- +before_test: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) DatasetInfoHook + -------------------- +before_test_epoch: +(NORMAL ) IterTimerHook + -------------------- +before_test_iter: +(NORMAL ) IterTimerHook + -------------------- +after_test_iter: +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_test_epoch: +(VERY_HIGH ) RuntimeInfoHook +(NORMAL ) IterTimerHook +(BELOW_NORMAL) LoggerHook + -------------------- +after_test: +(VERY_HIGH ) RuntimeInfoHook + -------------------- +after_run: +(BELOW_NORMAL) LoggerHook + -------------------- +2025/09/26 01:42:23 - mmengine - INFO - Loading unwanted prefixes from: /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv +2025/09/26 01:42:23 - mmengine - INFO - Loaded 210 prefixes to filter out. +2025/09/26 01:42:23 - mmengine - INFO - Filtered out 504 samples. +2025/09/26 01:42:23 - mmengine - INFO - [DEBUG] dataset full size used. +2025/09/26 01:42:23 - mmengine - INFO - xtuner_dataset_timeout = 1:00:00 +2025/09/26 01:43:05 - mmengine - WARNING - Dataset LLaVADataset has no metainfo. ``dataset_meta`` in visualizer will be None. +2025/09/26 01:43:06 - mmengine - INFO - train_stage == 2 +2025/09/26 01:43:20 - mmengine - INFO - using simple Resampler with 100 latents +2025/09/26 01:43:53 - mmengine - INFO - enable projector input require grads +2025/09/26 01:43:53 - mmengine - INFO - enable input required grads for projector +2025/09/26 01:43:53 - mmengine - INFO - Building lora +2025/09/26 01:43:56 - mmengine - INFO - loading token_merge from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors +2025/09/26 01:43:56 - mmengine - INFO - Loading projector from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors +2025/09/26 01:43:57 - mmengine - INFO - Loading resampler from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.embed_tokens.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.0.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.1.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.2.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.3.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.4.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.5.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.6.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.7.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.8.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:05 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.9.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.10.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.11.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.12.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.13.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.14.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.15.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.16.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.17.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.18.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.19.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.20.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.21.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.22.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.23.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.24.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.25.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.26.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.q_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.q_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.k_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.k_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.v_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.v_proj.base_layer.bias is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.self_attn.o_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.gate_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.up_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.mlp.down_proj.base_layer.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.input_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.layers.27.post_attention_layernorm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.model.norm.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - WARNING - llm.base_model.model.lm_head.weight is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- token_merge.ln_in.weight:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- token_merge.ln_in.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- token_merge.ln_out.weight:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- token_merge.ln_out.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- projector.model.0.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- projector.model.2.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - WARNING - resampler.query_pos_embed is skipped since its requires_grad=False +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.attn.out_proj.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.ln_q.weight:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.ln_q.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.ln_kv.weight:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.ln_kv.bias:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.ln_post.weight:weight_decay=0.0 +2025/09/26 01:44:06 - mmengine - INFO - paramwise_options -- resampler.ln_post.bias:weight_decay=0.0 +2025/09/26 01:44:21 - mmengine - INFO - Load checkpoint from /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b/iter_4603.pth +2025/09/26 01:44:23 - mmengine - INFO - Num train samples 19805 +2025/09/26 01:44:23 - mmengine - INFO - train example: +2025/09/26 01:44:23 - mmengine - INFO - <|im_start|>user + +Briefly outline the characteristics and diagnostic findings of the whole slide histopathology image.<|im_end|> +<|im_start|>assistant +Microscopic observation of the pathology slide reveals disorganized sheets and nests of malignant squamous cells with extensive keratinization in focus, significant nuclear pleomorphism, and numerous mitotic figures, indicating a poorly differentiated state. Vascular invasion is not observed. There is notable necrosis throughout the tumor cell islands, and no calcification is identified. Based on these observations, the diagnosis is poorly differentiated keratinizing squamous cell carcinoma.<|im_end|> + +2025/09/26 01:44:23 - mmengine - INFO - before_train in EvaluateChatHook. +2025/09/26 01:44:23 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 01:44:27 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes, the tumor cells are organized in a lobulated pattern, which is a characteristic feature of papillary thyroid carcinoma. This pattern is evident in the slide, showcasing the distinct lobular formations.<|im_end|> + +2025/09/26 01:44:27 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 01:44:31 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant architectural and nuclear atypia. The tumor is characterized by glandular structures with irregular shapes and poorly defined borders, reflecting a high degree of architectural deviation. The nuclear features are marked + +2025/09/26 01:44:31 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 01:44:32 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 01:44:32 - mmengine - INFO - [check] projector params NOT in optimizer: ['model.0.weight', 'model.0.bias', 'model.2.weight', 'model.2.bias'] +2025/09/26 01:44:32 - mmengine - INFO - [Trainable] 407,778,304 / 4,761,108,992 params (8.56%) +2025/09/26 01:44:32 - mmengine - INFO - llm.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.1.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.2.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.3.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.4.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.5.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.6.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.7.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.8.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.9.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.10.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.11.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.12.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.gate_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.gate_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.up_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.up_proj.lora_B.default.weight shape=(18944, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.down_proj.lora_A.default.weight shape=(128, 18944) dtype=torch.bfloat16 +llm.base_model.model.model.layers.13.mlp.down_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight shape=(3584, 128) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight shape=(128, 3584) dtype=torch.bfloat16 +llm.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight shape=(512, 128) dtype=torch.bfloat16 +2025/09/26 01:44:32 - mmengine - INFO - ... (212 more trainable tensors not shown) +2025/09/26 01:44:32 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io +2025/09/26 01:44:32 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. +2025/09/26 01:44:32 - mmengine - INFO - Checkpoints will be saved to /mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2c. +2025/09/26 01:44:42 - mmengine - INFO - Iter(train) [ 10/2476] base_lr: 9.2439e-07 lr: 9.2439e-07 eta: 0:43:07 time: 1.0141 data_time: 0.0081 memory: 15745 loss: 2.0625 tflops: 9.1918 tokens_per_sec: 151.8592 +2025/09/26 01:44:52 - mmengine - INFO - Iter(train) [ 20/2476] base_lr: 1.7293e-06 lr: 1.7293e-06 eta: 0:41:54 time: 1.0192 data_time: 0.0082 memory: 15746 loss: 2.0469 tflops: 3.7965 tokens_per_sec: 62.7973 +2025/09/26 01:45:02 - mmengine - INFO - Iter(train) [ 30/2476] base_lr: 2.5341e-06 lr: 2.5341e-06 eta: 0:41:22 time: 1.0137 data_time: 0.0082 memory: 15746 loss: 1.4844 tflops: 8.3577 tokens_per_sec: 138.1038 +2025/09/26 01:45:13 - mmengine - INFO - Iter(train) [ 40/2476] base_lr: 3.3390e-06 lr: 3.3390e-06 eta: 0:42:04 time: 1.0035 data_time: 0.0081 memory: 15745 loss: 0.9258 tflops: 6.2689 tokens_per_sec: 103.6384 +2025/09/26 01:45:23 - mmengine - INFO - Iter(train) [ 50/2476] base_lr: 4.1439e-06 lr: 4.1439e-06 eta: 0:41:37 time: 0.9895 data_time: 0.0082 memory: 15745 loss: 1.0391 tflops: 7.8269 tokens_per_sec: 129.3538 +2025/09/26 01:45:33 - mmengine - INFO - Iter(train) [ 60/2476] base_lr: 4.9488e-06 lr: 4.9488e-06 eta: 0:41:29 time: 1.0207 data_time: 0.0088 memory: 15745 loss: 2.0000 tflops: 9.2508 tokens_per_sec: 152.8295 +2025/09/26 01:45:44 - mmengine - INFO - Iter(train) [ 70/2476] base_lr: 5.7537e-06 lr: 5.7537e-06 eta: 0:41:44 time: 0.9985 data_time: 0.0084 memory: 16136 loss: 1.6953 tflops: 7.7563 tokens_per_sec: 128.1873 +2025/09/26 01:45:54 - mmengine - INFO - Iter(train) [ 80/2476] base_lr: 6.5585e-06 lr: 6.5585e-06 eta: 0:41:21 time: 0.9834 data_time: 0.0089 memory: 16136 loss: 0.2871 tflops: 3.4422 tokens_per_sec: 56.9426 +2025/09/26 01:46:05 - mmengine - INFO - Iter(train) [ 90/2476] base_lr: 7.3634e-06 lr: 7.3634e-06 eta: 0:41:25 time: 1.1789 data_time: 0.0082 memory: 16135 loss: 0.2852 tflops: 2.9229 tokens_per_sec: 48.3518 +2025/09/26 01:46:16 - mmengine - INFO - Iter(train) [ 100/2476] base_lr: 8.1683e-06 lr: 8.1683e-06 eta: 0:41:21 time: 0.9815 data_time: 0.0082 memory: 16136 loss: 2.1719 tflops: 9.6203 tokens_per_sec: 158.9349 +2025/09/26 01:46:26 - mmengine - INFO - Iter(train) [ 110/2476] base_lr: 8.9732e-06 lr: 8.9732e-06 eta: 0:41:03 time: 0.9913 data_time: 0.0080 memory: 16136 loss: 2.4531 tflops: 11.9745 tokens_per_sec: 197.7226 +2025/09/26 01:46:37 - mmengine - INFO - Iter(train) [ 120/2476] base_lr: 9.7780e-06 lr: 9.7780e-06 eta: 0:40:57 time: 0.9812 data_time: 0.0085 memory: 16136 loss: 1.4453 tflops: 6.3498 tokens_per_sec: 104.9765 +2025/09/26 01:46:48 - mmengine - INFO - Iter(train) [ 130/2476] base_lr: 1.0583e-05 lr: 1.0583e-05 eta: 0:40:54 time: 1.0091 data_time: 0.0085 memory: 16138 loss: 0.0046 tflops: 4.9737 tokens_per_sec: 82.2487 +2025/09/26 01:46:58 - mmengine - INFO - Iter(train) [ 140/2476] base_lr: 1.1388e-05 lr: 1.1388e-05 eta: 0:40:39 time: 0.9934 data_time: 0.0087 memory: 16136 loss: 1.9609 tflops: 12.0718 tokens_per_sec: 199.3246 +2025/09/26 01:47:09 - mmengine - INFO - Iter(train) [ 150/2476] base_lr: 1.2193e-05 lr: 1.2193e-05 eta: 0:40:36 time: 0.9956 data_time: 0.0089 memory: 16136 loss: 1.7031 tflops: 10.6426 tokens_per_sec: 175.7797 +2025/09/26 01:47:20 - mmengine - INFO - Iter(train) [ 160/2476] base_lr: 1.2998e-05 lr: 1.2998e-05 eta: 0:40:32 time: 0.9964 data_time: 0.0084 memory: 16135 loss: 0.4648 tflops: 3.5794 tokens_per_sec: 59.2110 +2025/09/26 01:47:30 - mmengine - INFO - Iter(train) [ 170/2476] base_lr: 1.3802e-05 lr: 1.3802e-05 eta: 0:40:15 time: 0.9921 data_time: 0.0083 memory: 16136 loss: 1.4844 tflops: 12.1479 tokens_per_sec: 200.5775 +2025/09/26 01:47:41 - mmengine - INFO - Iter(train) [ 180/2476] base_lr: 1.4607e-05 lr: 1.4607e-05 eta: 0:40:12 time: 1.3137 data_time: 0.0079 memory: 16135 loss: 0.4297 tflops: 3.0834 tokens_per_sec: 51.0006 +2025/09/26 01:47:51 - mmengine - INFO - Iter(train) [ 190/2476] base_lr: 1.5412e-05 lr: 1.5412e-05 eta: 0:39:57 time: 0.9913 data_time: 0.0082 memory: 16135 loss: 1.5000 tflops: 14.2415 tokens_per_sec: 235.0394 +2025/09/26 01:48:01 - mmengine - INFO - Iter(train) [ 200/2476] base_lr: 1.6217e-05 lr: 1.6217e-05 eta: 0:39:45 time: 1.0161 data_time: 0.0085 memory: 16136 loss: 1.6406 tflops: 7.5032 tokens_per_sec: 124.0079 +2025/09/26 01:48:12 - mmengine - INFO - Iter(train) [ 210/2476] base_lr: 1.7022e-05 lr: 1.7022e-05 eta: 0:39:38 time: 0.9934 data_time: 0.0080 memory: 16136 loss: 0.0762 tflops: 5.1133 tokens_per_sec: 84.5565 +2025/09/26 01:48:23 - mmengine - INFO - Iter(train) [ 220/2476] base_lr: 1.7827e-05 lr: 1.7827e-05 eta: 0:39:29 time: 1.0054 data_time: 0.0080 memory: 16136 loss: 1.7109 tflops: 11.0217 tokens_per_sec: 182.0215 +2025/09/26 01:48:33 - mmengine - INFO - Iter(train) [ 230/2476] base_lr: 1.8632e-05 lr: 1.8632e-05 eta: 0:39:13 time: 0.9970 data_time: 0.0083 memory: 16136 loss: 2.1250 tflops: 10.2010 tokens_per_sec: 168.5007 +2025/09/26 01:48:43 - mmengine - INFO - Iter(train) [ 240/2476] base_lr: 1.9437e-05 lr: 1.9437e-05 eta: 0:39:07 time: 1.2311 data_time: 0.0079 memory: 16136 loss: 7.6250 tflops: 1.8163 tokens_per_sec: 30.0546 +2025/09/26 01:48:54 - mmengine - INFO - Iter(train) [ 250/2476] base_lr: 2.0000e-05 lr: 2.0000e-05 eta: 0:38:57 time: 0.9984 data_time: 0.0082 memory: 16135 loss: 1.8359 tflops: 9.5183 tokens_per_sec: 157.2478 +2025/09/26 01:49:04 - mmengine - INFO - Iter(train) [ 260/2476] base_lr: 2.0000e-05 lr: 2.0000e-05 eta: 0:38:45 time: 0.9947 data_time: 0.0085 memory: 16136 loss: 1.5000 tflops: 8.3344 tokens_per_sec: 137.7252 +2025/09/26 01:49:16 - mmengine - INFO - Iter(train) [ 270/2476] base_lr: 1.9999e-05 lr: 1.9999e-05 eta: 0:38:41 time: 1.0031 data_time: 0.0084 memory: 16136 loss: 1.7969 tflops: 14.8013 tokens_per_sec: 244.2402 +2025/09/26 01:49:26 - mmengine - INFO - Iter(train) [ 280/2476] base_lr: 1.9998e-05 lr: 1.9998e-05 eta: 0:38:33 time: 1.0157 data_time: 0.0082 memory: 16136 loss: 0.0508 tflops: 4.9417 tokens_per_sec: 81.7189 +2025/09/26 01:49:37 - mmengine - INFO - Iter(train) [ 290/2476] base_lr: 1.9996e-05 lr: 1.9996e-05 eta: 0:38:22 time: 1.3622 data_time: 0.0081 memory: 16136 loss: 1.9531 tflops: 6.5757 tokens_per_sec: 108.6464 +2025/09/26 01:49:48 - mmengine - INFO - Iter(train) [ 300/2476] base_lr: 1.9994e-05 lr: 1.9994e-05 eta: 0:38:12 time: 0.9971 data_time: 0.0080 memory: 16137 loss: 0.2246 tflops: 5.1552 tokens_per_sec: 85.2476 +2025/09/26 01:49:58 - mmengine - INFO - Iter(train) [ 310/2476] base_lr: 1.9991e-05 lr: 1.9991e-05 eta: 0:38:02 time: 1.0174 data_time: 0.0083 memory: 16137 loss: 1.5781 tflops: 16.5069 tokens_per_sec: 272.2684 +2025/09/26 01:50:09 - mmengine - INFO - Iter(train) [ 320/2476] base_lr: 1.9988e-05 lr: 1.9988e-05 eta: 0:37:51 time: 1.2566 data_time: 0.0081 memory: 16135 loss: 1.3359 tflops: 2.6939 tokens_per_sec: 44.5647 +2025/09/26 01:50:20 - mmengine - INFO - Iter(train) [ 330/2476] base_lr: 1.9985e-05 lr: 1.9985e-05 eta: 0:37:45 time: 0.9965 data_time: 0.0078 memory: 16136 loss: 1.3828 tflops: 11.6679 tokens_per_sec: 192.6708 +2025/09/26 01:50:30 - mmengine - INFO - Iter(train) [ 340/2476] base_lr: 1.9981e-05 lr: 1.9981e-05 eta: 0:37:34 time: 1.0249 data_time: 0.0079 memory: 16136 loss: 2.1875 tflops: 10.5754 tokens_per_sec: 174.6595 +2025/09/26 01:50:41 - mmengine - INFO - Iter(train) [ 350/2476] base_lr: 1.9977e-05 lr: 1.9977e-05 eta: 0:37:22 time: 1.2190 data_time: 0.0083 memory: 16136 loss: 0.3984 tflops: 2.8267 tokens_per_sec: 46.7605 +2025/09/26 01:50:52 - mmengine - INFO - Iter(train) [ 360/2476] base_lr: 1.9972e-05 lr: 1.9972e-05 eta: 0:37:15 time: 1.3009 data_time: 0.0088 memory: 16136 loss: 0.3398 tflops: 2.7417 tokens_per_sec: 45.3535 +2025/09/26 01:51:02 - mmengine - INFO - Iter(train) [ 370/2476] base_lr: 1.9967e-05 lr: 1.9967e-05 eta: 0:37:03 time: 0.9903 data_time: 0.0079 memory: 16136 loss: 1.0781 tflops: 3.7239 tokens_per_sec: 61.5985 +2025/09/26 01:51:13 - mmengine - INFO - Iter(train) [ 380/2476] base_lr: 1.9961e-05 lr: 1.9961e-05 eta: 0:36:53 time: 1.0288 data_time: 0.0086 memory: 16136 loss: 0.4844 tflops: 4.1138 tokens_per_sec: 68.0409 +2025/09/26 01:51:24 - mmengine - INFO - Iter(train) [ 390/2476] base_lr: 1.9955e-05 lr: 1.9955e-05 eta: 0:36:46 time: 1.3398 data_time: 0.0085 memory: 16136 loss: 1.3984 tflops: 7.5007 tokens_per_sec: 123.9010 +2025/09/26 01:51:34 - mmengine - INFO - Iter(train) [ 400/2476] base_lr: 1.9949e-05 lr: 1.9949e-05 eta: 0:36:34 time: 0.9872 data_time: 0.0079 memory: 16136 loss: 1.6250 tflops: 9.6877 tokens_per_sec: 160.0436 +2025/09/26 01:51:45 - mmengine - INFO - Iter(train) [ 410/2476] base_lr: 1.9942e-05 lr: 1.9942e-05 eta: 0:36:22 time: 1.0022 data_time: 0.0085 memory: 16135 loss: 1.0234 tflops: 3.5589 tokens_per_sec: 58.8718 +2025/09/26 01:51:55 - mmengine - INFO - Iter(train) [ 420/2476] base_lr: 1.9934e-05 lr: 1.9934e-05 eta: 0:36:13 time: 0.9976 data_time: 0.0083 memory: 16136 loss: 1.6094 tflops: 8.4931 tokens_per_sec: 140.3427 +2025/09/26 01:52:06 - mmengine - INFO - Iter(train) [ 430/2476] base_lr: 1.9926e-05 lr: 1.9926e-05 eta: 0:36:02 time: 0.9772 data_time: 0.0078 memory: 16135 loss: 1.1250 tflops: 12.7688 tokens_per_sec: 210.8096 +2025/09/26 01:52:17 - mmengine - INFO - Iter(train) [ 440/2476] base_lr: 1.9918e-05 lr: 1.9918e-05 eta: 0:35:52 time: 1.3345 data_time: 0.0080 memory: 16138 loss: 1.0703 tflops: 7.6216 tokens_per_sec: 125.8937 +2025/09/26 01:52:28 - mmengine - INFO - Iter(train) [ 450/2476] base_lr: 1.9909e-05 lr: 1.9909e-05 eta: 0:35:44 time: 0.9915 data_time: 0.0071 memory: 16136 loss: 1.8281 tflops: 9.3405 tokens_per_sec: 154.3173 +2025/09/26 01:52:39 - mmengine - INFO - Iter(train) [ 460/2476] base_lr: 1.9900e-05 lr: 1.9900e-05 eta: 0:35:35 time: 1.0816 data_time: 0.0086 memory: 16136 loss: 0.0854 tflops: 4.6406 tokens_per_sec: 76.7398 +2025/09/26 01:52:50 - mmengine - INFO - Iter(train) [ 470/2476] base_lr: 1.9890e-05 lr: 1.9890e-05 eta: 0:35:28 time: 1.0961 data_time: 0.0101 memory: 16136 loss: 2.0781 tflops: 11.2169 tokens_per_sec: 185.1961 +2025/09/26 01:53:01 - mmengine - INFO - Iter(train) [ 480/2476] base_lr: 1.9880e-05 lr: 1.9880e-05 eta: 0:35:17 time: 0.9877 data_time: 0.0087 memory: 16136 loss: 1.7969 tflops: 8.1485 tokens_per_sec: 134.6598 +2025/09/26 01:53:11 - mmengine - INFO - Iter(train) [ 490/2476] base_lr: 1.9870e-05 lr: 1.9870e-05 eta: 0:35:05 time: 0.9924 data_time: 0.0085 memory: 16137 loss: 1.4062 tflops: 6.5219 tokens_per_sec: 107.8169 +2025/09/26 01:53:22 - mmengine - INFO - Iter(train) [ 500/2476] base_lr: 1.9859e-05 lr: 1.9859e-05 eta: 0:34:55 time: 0.9922 data_time: 0.0087 memory: 16136 loss: 2.1250 tflops: 9.5173 tokens_per_sec: 157.2328 +2025/09/26 01:53:32 - mmengine - INFO - Iter(train) [ 510/2476] base_lr: 1.9847e-05 lr: 1.9847e-05 eta: 0:34:45 time: 1.2703 data_time: 0.0082 memory: 16136 loss: 0.4082 tflops: 2.8076 tokens_per_sec: 46.4441 +2025/09/26 01:53:35 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/26 01:53:35 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 01:53:35 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes<|im_end|> + +2025/09/26 01:53:35 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 01:53:45 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +The pathology report indicates a poorly differentiated adenocarcinoma with significant nuclear pleomorphism and marked nuclear atypia. The tumor cells exhibit a high degree of nuclear pleomorphism, with irregular nuclear shapes and prominent nucleoli. The tumor is characterized by a high mitotic rate, with numerous mitotic figures observed. The tumor cells are arranged in sheets and nests, with areas of glandular formation. There is no evidence of necrosis or vascular invasion. The tumor is infiltrative, with a high nuclear grade. The tumor is poorly differentiated, with a high nuclear grade.<|im_end|> + +2025/09/26 01:53:45 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 01:53:46 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 01:54:32 - mmengine - INFO - Iter(train) [ 520/2476] base_lr: 1.9836e-05 lr: 1.9836e-05 eta: 0:37:37 time: 1.0039 data_time: 0.0083 memory: 16136 loss: 0.0269 tflops: 5.4819 tokens_per_sec: 90.6438 +2025/09/26 01:54:42 - mmengine - INFO - Iter(train) [ 530/2476] base_lr: 1.9823e-05 lr: 1.9823e-05 eta: 0:37:22 time: 1.0656 data_time: 0.0082 memory: 16136 loss: 1.9766 tflops: 7.2114 tokens_per_sec: 119.1833 +2025/09/26 01:54:53 - mmengine - INFO - Iter(train) [ 540/2476] base_lr: 1.9811e-05 lr: 1.9811e-05 eta: 0:37:08 time: 1.0058 data_time: 0.0082 memory: 16136 loss: 0.3320 tflops: 3.3658 tokens_per_sec: 55.6793 +2025/09/26 01:55:04 - mmengine - INFO - Iter(train) [ 550/2476] base_lr: 1.9797e-05 lr: 1.9797e-05 eta: 0:36:54 time: 0.9995 data_time: 0.0082 memory: 16136 loss: 1.9922 tflops: 12.6659 tokens_per_sec: 209.1032 +2025/09/26 01:55:15 - mmengine - INFO - Iter(train) [ 560/2476] base_lr: 1.9784e-05 lr: 1.9784e-05 eta: 0:36:40 time: 0.9975 data_time: 0.0080 memory: 16136 loss: 0.3320 tflops: 3.6363 tokens_per_sec: 60.1501 +2025/09/26 01:55:25 - mmengine - INFO - Iter(train) [ 570/2476] base_lr: 1.9770e-05 lr: 1.9770e-05 eta: 0:36:25 time: 0.9888 data_time: 0.0083 memory: 16136 loss: 1.3906 tflops: 15.1391 tokens_per_sec: 249.8067 +2025/09/26 01:55:37 - mmengine - INFO - Iter(train) [ 580/2476] base_lr: 1.9755e-05 lr: 1.9755e-05 eta: 0:36:13 time: 1.0163 data_time: 0.0088 memory: 16136 loss: 1.6719 tflops: 15.8061 tokens_per_sec: 260.7513 +2025/09/26 01:55:48 - mmengine - INFO - Iter(train) [ 590/2476] base_lr: 1.9740e-05 lr: 1.9740e-05 eta: 0:36:00 time: 1.2463 data_time: 0.0086 memory: 16136 loss: 0.2363 tflops: 1.5516 tokens_per_sec: 25.6752 +2025/09/26 01:55:58 - mmengine - INFO - Iter(train) [ 600/2476] base_lr: 1.9725e-05 lr: 1.9725e-05 eta: 0:35:46 time: 1.0020 data_time: 0.0083 memory: 16136 loss: 0.4199 tflops: 3.3179 tokens_per_sec: 54.8883 +2025/09/26 01:56:09 - mmengine - INFO - Iter(train) [ 610/2476] base_lr: 1.9709e-05 lr: 1.9709e-05 eta: 0:35:33 time: 1.0155 data_time: 0.0091 memory: 16135 loss: 1.6094 tflops: 8.5218 tokens_per_sec: 140.8112 +2025/09/26 01:56:20 - mmengine - INFO - Iter(train) [ 620/2476] base_lr: 1.9693e-05 lr: 1.9693e-05 eta: 0:35:19 time: 0.9779 data_time: 0.0084 memory: 16136 loss: 1.5547 tflops: 8.6643 tokens_per_sec: 143.1711 +2025/09/26 01:56:30 - mmengine - INFO - Iter(train) [ 630/2476] base_lr: 1.9676e-05 lr: 1.9676e-05 eta: 0:35:05 time: 1.3389 data_time: 0.0078 memory: 16136 loss: 1.6797 tflops: 6.6903 tokens_per_sec: 110.5409 +2025/09/26 01:56:41 - mmengine - INFO - Iter(train) [ 640/2476] base_lr: 1.9659e-05 lr: 1.9659e-05 eta: 0:34:52 time: 1.2412 data_time: 0.0084 memory: 16136 loss: 1.3359 tflops: 10.3957 tokens_per_sec: 171.6147 +2025/09/26 01:56:51 - mmengine - INFO - Iter(train) [ 650/2476] base_lr: 1.9642e-05 lr: 1.9642e-05 eta: 0:34:37 time: 1.0145 data_time: 0.0087 memory: 16136 loss: 1.7031 tflops: 10.0250 tokens_per_sec: 165.5945 +2025/09/26 01:57:02 - mmengine - INFO - Iter(train) [ 660/2476] base_lr: 1.9624e-05 lr: 1.9624e-05 eta: 0:34:25 time: 1.2344 data_time: 0.0082 memory: 16136 loss: 1.5938 tflops: 6.6182 tokens_per_sec: 109.3683 +2025/09/26 01:57:12 - mmengine - INFO - Iter(train) [ 670/2476] base_lr: 1.9606e-05 lr: 1.9606e-05 eta: 0:34:10 time: 1.2947 data_time: 0.0085 memory: 16136 loss: 1.2031 tflops: 6.8250 tokens_per_sec: 112.7696 +2025/09/26 01:57:23 - mmengine - INFO - Iter(train) [ 680/2476] base_lr: 1.9587e-05 lr: 1.9587e-05 eta: 0:33:57 time: 1.1922 data_time: 0.0080 memory: 16135 loss: 1.4531 tflops: 7.0046 tokens_per_sec: 115.7482 +2025/09/26 01:57:34 - mmengine - INFO - Iter(train) [ 690/2476] base_lr: 1.9568e-05 lr: 1.9568e-05 eta: 0:33:44 time: 0.9767 data_time: 0.0079 memory: 16135 loss: 0.0047 tflops: 5.4489 tokens_per_sec: 90.1006 +2025/09/26 01:57:44 - mmengine - INFO - Iter(train) [ 700/2476] base_lr: 1.9548e-05 lr: 1.9548e-05 eta: 0:33:30 time: 0.9875 data_time: 0.0089 memory: 16136 loss: 0.2285 tflops: 3.7955 tokens_per_sec: 62.7829 +2025/09/26 01:57:55 - mmengine - INFO - Iter(train) [ 710/2476] base_lr: 1.9528e-05 lr: 1.9528e-05 eta: 0:33:18 time: 0.9888 data_time: 0.0084 memory: 16136 loss: 1.3203 tflops: 7.5878 tokens_per_sec: 125.4100 +2025/09/26 01:58:06 - mmengine - INFO - Iter(train) [ 720/2476] base_lr: 1.9507e-05 lr: 1.9507e-05 eta: 0:33:05 time: 1.3350 data_time: 0.0079 memory: 16136 loss: 0.0850 tflops: 3.4877 tokens_per_sec: 57.6798 +2025/09/26 01:58:16 - mmengine - INFO - Iter(train) [ 730/2476] base_lr: 1.9487e-05 lr: 1.9487e-05 eta: 0:32:52 time: 1.0468 data_time: 0.0082 memory: 16136 loss: 1.9375 tflops: 6.8776 tokens_per_sec: 113.6784 +2025/09/26 01:58:27 - mmengine - INFO - Iter(train) [ 740/2476] base_lr: 1.9465e-05 lr: 1.9465e-05 eta: 0:32:39 time: 0.9809 data_time: 0.0084 memory: 16136 loss: 2.0938 tflops: 12.2253 tokens_per_sec: 201.8591 +2025/09/26 01:58:38 - mmengine - INFO - Iter(train) [ 750/2476] base_lr: 1.9443e-05 lr: 1.9443e-05 eta: 0:32:27 time: 0.9959 data_time: 0.0082 memory: 16136 loss: 0.4453 tflops: 3.8852 tokens_per_sec: 64.2642 +2025/09/26 01:58:48 - mmengine - INFO - Iter(train) [ 760/2476] base_lr: 1.9421e-05 lr: 1.9421e-05 eta: 0:32:13 time: 0.9805 data_time: 0.0082 memory: 16136 loss: 1.2031 tflops: 8.0224 tokens_per_sec: 132.5816 +2025/09/26 01:58:59 - mmengine - INFO - Iter(train) [ 770/2476] base_lr: 1.9399e-05 lr: 1.9399e-05 eta: 0:32:02 time: 0.9941 data_time: 0.0087 memory: 16136 loss: 1.4297 tflops: 11.6351 tokens_per_sec: 192.1303 +2025/09/26 01:59:10 - mmengine - INFO - Iter(train) [ 780/2476] base_lr: 1.9376e-05 lr: 1.9376e-05 eta: 0:31:50 time: 1.0156 data_time: 0.0084 memory: 16136 loss: 0.3125 tflops: 3.5121 tokens_per_sec: 58.0965 +2025/09/26 01:59:20 - mmengine - INFO - Iter(train) [ 790/2476] base_lr: 1.9352e-05 lr: 1.9352e-05 eta: 0:31:37 time: 1.0136 data_time: 0.0082 memory: 16136 loss: 1.4609 tflops: 9.5552 tokens_per_sec: 157.8505 +2025/09/26 01:59:31 - mmengine - INFO - Iter(train) [ 800/2476] base_lr: 1.9328e-05 lr: 1.9328e-05 eta: 0:31:24 time: 1.2165 data_time: 0.0086 memory: 16135 loss: 1.7969 tflops: 7.0146 tokens_per_sec: 115.9097 +2025/09/26 01:59:42 - mmengine - INFO - Iter(train) [ 810/2476] base_lr: 1.9304e-05 lr: 1.9304e-05 eta: 0:31:12 time: 0.9806 data_time: 0.0087 memory: 16135 loss: 0.0111 tflops: 5.2417 tokens_per_sec: 86.6781 +2025/09/26 01:59:53 - mmengine - INFO - Iter(train) [ 820/2476] base_lr: 1.9279e-05 lr: 1.9279e-05 eta: 0:31:00 time: 1.3954 data_time: 0.0083 memory: 16136 loss: 2.0938 tflops: 6.8539 tokens_per_sec: 113.2283 +2025/09/26 02:00:04 - mmengine - INFO - Iter(train) [ 830/2476] base_lr: 1.9254e-05 lr: 1.9254e-05 eta: 0:30:48 time: 1.0048 data_time: 0.0082 memory: 16135 loss: 0.1060 tflops: 3.6099 tokens_per_sec: 59.7138 +2025/09/26 02:00:14 - mmengine - INFO - Iter(train) [ 840/2476] base_lr: 1.9229e-05 lr: 1.9229e-05 eta: 0:30:36 time: 1.0093 data_time: 0.0084 memory: 16135 loss: 0.0413 tflops: 5.2726 tokens_per_sec: 87.1852 +2025/09/26 02:00:25 - mmengine - INFO - Iter(train) [ 850/2476] base_lr: 1.9203e-05 lr: 1.9203e-05 eta: 0:30:24 time: 1.3406 data_time: 0.0089 memory: 16136 loss: 1.4531 tflops: 7.3151 tokens_per_sec: 120.8409 +2025/09/26 02:00:36 - mmengine - INFO - Iter(train) [ 860/2476] base_lr: 1.9177e-05 lr: 1.9177e-05 eta: 0:30:12 time: 0.9942 data_time: 0.0087 memory: 16136 loss: 1.8203 tflops: 7.9727 tokens_per_sec: 131.7589 +2025/09/26 02:00:46 - mmengine - INFO - Iter(train) [ 870/2476] base_lr: 1.9150e-05 lr: 1.9150e-05 eta: 0:29:59 time: 1.0168 data_time: 0.0090 memory: 16137 loss: 0.2344 tflops: 4.8173 tokens_per_sec: 79.6648 +2025/09/26 02:00:57 - mmengine - INFO - Iter(train) [ 880/2476] base_lr: 1.9123e-05 lr: 1.9123e-05 eta: 0:29:47 time: 0.9913 data_time: 0.0082 memory: 16136 loss: 0.2383 tflops: 3.9032 tokens_per_sec: 64.5617 +2025/09/26 02:01:08 - mmengine - INFO - Iter(train) [ 890/2476] base_lr: 1.9095e-05 lr: 1.9095e-05 eta: 0:29:35 time: 0.9784 data_time: 0.0088 memory: 16136 loss: 0.4219 tflops: 3.9545 tokens_per_sec: 65.4114 +2025/09/26 02:01:19 - mmengine - INFO - Iter(train) [ 900/2476] base_lr: 1.9067e-05 lr: 1.9067e-05 eta: 0:29:23 time: 1.0109 data_time: 0.0093 memory: 16138 loss: 1.7344 tflops: 11.8017 tokens_per_sec: 194.8675 +2025/09/26 02:01:30 - mmengine - INFO - Iter(train) [ 910/2476] base_lr: 1.9039e-05 lr: 1.9039e-05 eta: 0:29:12 time: 0.9966 data_time: 0.0086 memory: 16137 loss: 1.3047 tflops: 17.9495 tokens_per_sec: 295.9927 +2025/09/26 02:01:41 - mmengine - INFO - Iter(train) [ 920/2476] base_lr: 1.9010e-05 lr: 1.9010e-05 eta: 0:29:00 time: 0.9851 data_time: 0.0088 memory: 16136 loss: 1.5000 tflops: 8.8468 tokens_per_sec: 146.1785 +2025/09/26 02:01:51 - mmengine - INFO - Iter(train) [ 930/2476] base_lr: 1.8981e-05 lr: 1.8981e-05 eta: 0:28:47 time: 0.9908 data_time: 0.0081 memory: 16136 loss: 1.8047 tflops: 11.4290 tokens_per_sec: 188.7383 +2025/09/26 02:02:02 - mmengine - INFO - Iter(train) [ 940/2476] base_lr: 1.8951e-05 lr: 1.8951e-05 eta: 0:28:36 time: 1.0108 data_time: 0.0085 memory: 16136 loss: 1.6719 tflops: 8.4420 tokens_per_sec: 139.4957 +2025/09/26 02:02:12 - mmengine - INFO - Iter(train) [ 950/2476] base_lr: 1.8921e-05 lr: 1.8921e-05 eta: 0:28:23 time: 0.9877 data_time: 0.0082 memory: 16136 loss: 0.2754 tflops: 3.4886 tokens_per_sec: 57.7104 +2025/09/26 02:02:24 - mmengine - INFO - Iter(train) [ 960/2476] base_lr: 1.8891e-05 lr: 1.8891e-05 eta: 0:28:12 time: 1.6613 data_time: 0.0085 memory: 16136 loss: 0.0981 tflops: 2.9848 tokens_per_sec: 49.3593 +2025/09/26 02:02:35 - mmengine - INFO - Iter(train) [ 970/2476] base_lr: 1.8860e-05 lr: 1.8860e-05 eta: 0:28:01 time: 1.3182 data_time: 0.0086 memory: 16136 loss: 1.3359 tflops: 8.8208 tokens_per_sec: 145.6559 +2025/09/26 02:02:45 - mmengine - INFO - Iter(train) [ 980/2476] base_lr: 1.8829e-05 lr: 1.8829e-05 eta: 0:27:49 time: 1.0201 data_time: 0.0083 memory: 16136 loss: 0.0493 tflops: 5.4545 tokens_per_sec: 90.1881 +2025/09/26 02:02:55 - mmengine - INFO - Iter(train) [ 990/2476] base_lr: 1.8798e-05 lr: 1.8798e-05 eta: 0:27:36 time: 0.9922 data_time: 0.0086 memory: 16136 loss: 1.3203 tflops: 10.6783 tokens_per_sec: 176.3688 +2025/09/26 02:03:07 - mmengine - INFO - Exp name: temp_config_stage_2c_20250926_014218 +2025/09/26 02:03:07 - mmengine - INFO - Iter(train) [1000/2476] base_lr: 1.8766e-05 lr: 1.8766e-05 eta: 0:27:26 time: 1.3549 data_time: 0.0082 memory: 16136 loss: 1.5000 tflops: 7.9095 tokens_per_sec: 130.6343 +2025/09/26 02:03:17 - mmengine - INFO - Iter(train) [1010/2476] base_lr: 1.8733e-05 lr: 1.8733e-05 eta: 0:27:14 time: 1.2369 data_time: 0.0086 memory: 16137 loss: 1.8984 tflops: 14.2169 tokens_per_sec: 234.4576 +2025/09/26 02:03:28 - mmengine - INFO - Iter(train) [1020/2476] base_lr: 1.8701e-05 lr: 1.8701e-05 eta: 0:27:02 time: 1.0062 data_time: 0.0089 memory: 16136 loss: 0.2500 tflops: 4.5673 tokens_per_sec: 75.5353 +2025/09/26 02:03:33 - mmengine - INFO - after_train_iter in EvaluateChatHook. +2025/09/26 02:03:33 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 02:03:33 - mmengine - INFO - Sample output: +<|im_start|>user + +Are the tumor cells organized in a lobulated pattern within the slide?<|im_end|> +<|im_start|>assistant +Yes<|im_end|> + +2025/09/26 02:03:33 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 02:04:21 - mmengine - INFO - Sample output: +<|im_start|>user + +Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.<|im_end|> +<|im_start|>assistant +Microscopic observation of the pathology slide reveals a poorly differentiated adenocarcinoma with a high degree of nuclear pleomorphism and marked mitotic activity. The tumor cells exhibit a high nuclear-to-cytoplasmic ratio, with irregular nuclear contours and prominent nucleoli. The tumor is infiltrative, with a poorly defined margin, and there is no evidence of necrosis or vascular invasion. The tumor cells are arranged in sheets and nests, with areas of glandular formation. There is no evidence of lymphovascular invasion or perineural invasion. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is poorly differentiated, with a high nuclear grade and a high mitotic count. The tumor is + +2025/09/26 02:04:21 - mmengine - INFO - evaluate feats: torch.Size([1, 10240, 512]), coords: torch.Size([1, 10240, 2]), pixel_values(after proj + resampler): torch.Size([1, 100, 3584]) +2025/09/26 02:04:22 - mmengine - INFO - Sample output: +<|im_start|>user + +Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma<|im_end|> +<|im_start|>assistant +C) Well-differentiated squamous cell carcinoma<|im_end|> + +2025/09/26 02:04:50 - mmengine - INFO - Iter(train) [1030/2476] base_lr: 1.8667e-05 lr: 1.8667e-05 eta: 0:28:30 time: 2.9755 data_time: 0.0084 memory: 16136 loss: 1.6953 tflops: 2.7863 tokens_per_sec: 46.0430 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/20250926_014218.json b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/20250926_014218.json new file mode 100644 index 0000000000000000000000000000000000000000..a9267c8bae7c4b0de9965793d712eead1ab6fe14 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/20250926_014218.json @@ -0,0 +1,103 @@ +{"base_lr": 9.243902439024397e-07, "lr": 9.243902439024397e-07, "data_time": 0.008118867874145508, "loss": 2.0625, "time": 1.0140972137451172, "tflops": 9.191799003961803, "tokens_per_sec": 151.859208281539, "iter": 10, "memory": 15745, "step": 10} +{"base_lr": 1.7292682926829274e-06, "lr": 1.7292682926829274e-06, "data_time": 0.008229255676269531, "loss": 2.046875, "time": 1.0191514492034912, "tflops": 3.7964868826631593, "tokens_per_sec": 62.79733993407538, "iter": 20, "memory": 15746, "step": 20} +{"base_lr": 2.5341463414634137e-06, "lr": 2.5341463414634137e-06, "data_time": 0.008212089538574219, "loss": 1.484375, "time": 1.0137302875518799, "tflops": 8.357653278972437, "tokens_per_sec": 138.1037951800341, "iter": 30, "memory": 15746, "step": 30} +{"base_lr": 3.339024390243901e-06, "lr": 3.339024390243901e-06, "data_time": 0.008138179779052734, "loss": 0.92578125, "time": 1.0034892559051514, "tflops": 6.268907941571752, "tokens_per_sec": 103.63837917336537, "iter": 40, "memory": 15745, "step": 40} +{"base_lr": 4.143902439024386e-06, "lr": 4.143902439024386e-06, "data_time": 0.008208990097045898, "loss": 1.0390625, "time": 0.9895339012145996, "tflops": 7.826883824658906, "tokens_per_sec": 129.35382996252835, "iter": 50, "memory": 15745, "step": 50} +{"base_lr": 4.948780487804875e-06, "lr": 4.948780487804875e-06, "data_time": 0.008820295333862305, "loss": 2.0, "time": 1.0207452774047852, "tflops": 9.250775194217402, "tokens_per_sec": 152.82950943106255, "iter": 60, "memory": 15745, "step": 60} +{"base_lr": 5.7536585365853625e-06, "lr": 5.7536585365853625e-06, "data_time": 0.008440256118774414, "loss": 1.6953125, "time": 0.9985389709472656, "tflops": 7.756299063641921, "tokens_per_sec": 128.18728534795633, "iter": 70, "memory": 16136, "step": 70} +{"base_lr": 6.5585365853658505e-06, "lr": 6.5585365853658505e-06, "data_time": 0.008870840072631836, "loss": 0.287109375, "time": 0.9834458827972412, "tflops": 3.442168112604207, "tokens_per_sec": 56.94263505446865, "iter": 80, "memory": 16136, "step": 80} +{"base_lr": 7.363414634146338e-06, "lr": 7.363414634146338e-06, "data_time": 0.00815582275390625, "loss": 0.28515625, "time": 1.1788604259490967, "tflops": 2.9228918826543606, "tokens_per_sec": 48.35178002863327, "iter": 90, "memory": 16135, "step": 90} +{"base_lr": 8.16829268292683e-06, "lr": 8.16829268292683e-06, "data_time": 0.00822758674621582, "loss": 2.171875, "time": 0.9815342426300049, "tflops": 9.620331804755848, "tokens_per_sec": 158.9348524223074, "iter": 100, "memory": 16136, "step": 100} +{"base_lr": 8.973170731707318e-06, "lr": 8.973170731707318e-06, "data_time": 0.007964611053466797, "loss": 2.453125, "time": 0.9912879467010498, "tflops": 11.9745029830851, "tokens_per_sec": 197.72256956425142, "iter": 110, "memory": 16136, "step": 110} +{"base_lr": 9.77804878048781e-06, "lr": 9.77804878048781e-06, "data_time": 0.008516788482666016, "loss": 1.4453125, "time": 0.9811723232269287, "tflops": 6.349762056964083, "tokens_per_sec": 104.9764629123898, "iter": 120, "memory": 16136, "step": 120} +{"base_lr": 1.0582926829268297e-05, "lr": 1.0582926829268297e-05, "data_time": 0.008511066436767578, "loss": 0.004638671875, "time": 1.0091347694396973, "tflops": 4.973694890879441, "tokens_per_sec": 82.24867729609782, "iter": 130, "memory": 16138, "step": 130} +{"base_lr": 1.1387804878048789e-05, "lr": 1.1387804878048789e-05, "data_time": 0.008749008178710938, "loss": 1.9609375, "time": 0.9933547973632812, "tflops": 12.071842520309946, "tokens_per_sec": 199.32455203857018, "iter": 140, "memory": 16136, "step": 140} +{"base_lr": 1.2192682926829275e-05, "lr": 1.2192682926829275e-05, "data_time": 0.008867025375366211, "loss": 1.703125, "time": 0.9955644607543945, "tflops": 10.642630882143598, "tokens_per_sec": 175.7796776586591, "iter": 150, "memory": 16136, "step": 150} +{"base_lr": 1.2997560975609762e-05, "lr": 1.2997560975609762e-05, "data_time": 0.008375406265258789, "loss": 0.46484375, "time": 0.9964370727539062, "tflops": 3.5794306608153432, "tokens_per_sec": 59.210964358119824, "iter": 160, "memory": 16135, "step": 160} +{"base_lr": 1.3802439024390248e-05, "lr": 1.3802439024390248e-05, "data_time": 0.008290290832519531, "loss": 1.484375, "time": 0.9921352863311768, "tflops": 12.147885835638707, "tokens_per_sec": 200.5774844836764, "iter": 170, "memory": 16136, "step": 170} +{"base_lr": 1.4607317073170733e-05, "lr": 1.4607317073170733e-05, "data_time": 0.00788116455078125, "loss": 0.4296875, "time": 1.3137102127075195, "tflops": 3.0834235403563524, "tokens_per_sec": 51.000593092645516, "iter": 180, "memory": 16135, "step": 180} +{"base_lr": 1.5412195121951217e-05, "lr": 1.5412195121951217e-05, "data_time": 0.00821065902709961, "loss": 1.5, "time": 0.9913232326507568, "tflops": 14.241470856292839, "tokens_per_sec": 235.03938203559773, "iter": 190, "memory": 16135, "step": 190} +{"base_lr": 1.6217073170731705e-05, "lr": 1.6217073170731705e-05, "data_time": 0.008462905883789062, "loss": 1.640625, "time": 1.0160644054412842, "tflops": 7.503214783753246, "tokens_per_sec": 124.00788702479275, "iter": 200, "memory": 16136, "step": 200} +{"base_lr": 1.7021951219512186e-05, "lr": 1.7021951219512186e-05, "data_time": 0.008041620254516602, "loss": 0.076171875, "time": 0.9934191703796387, "tflops": 5.113317179741633, "tokens_per_sec": 84.55645160120531, "iter": 210, "memory": 16136, "step": 210} +{"base_lr": 1.7826829268292678e-05, "lr": 1.7826829268292678e-05, "data_time": 0.008035898208618164, "loss": 1.7109375, "time": 1.005375862121582, "tflops": 11.021711485674135, "tokens_per_sec": 182.02147763289688, "iter": 220, "memory": 16136, "step": 220} +{"base_lr": 1.863170731707317e-05, "lr": 1.863170731707317e-05, "data_time": 0.008314847946166992, "loss": 2.125, "time": 0.9970285892486572, "tflops": 10.200975248092162, "tokens_per_sec": 168.50068474609466, "iter": 230, "memory": 16136, "step": 230} +{"base_lr": 1.9436585365853654e-05, "lr": 1.9436585365853654e-05, "data_time": 0.007876873016357422, "loss": 7.625, "time": 1.2310943603515625, "tflops": 1.816332265701899, "tokens_per_sec": 30.054560553265702, "iter": 240, "memory": 16136, "step": 240} +{"base_lr": 1.9999991083171344e-05, "lr": 1.9999991083171344e-05, "data_time": 0.008183002471923828, "loss": 1.8359375, "time": 0.9984240531921387, "tflops": 9.518341566590484, "tokens_per_sec": 157.2478141906597, "iter": 250, "memory": 16135, "step": 250} +{"base_lr": 1.9999678995838395e-05, "lr": 1.9999678995838395e-05, "data_time": 0.008537769317626953, "loss": 1.5, "time": 0.9947342872619629, "tflops": 8.33441135767381, "tokens_per_sec": 137.72522145281536, "iter": 260, "memory": 16136, "step": 260} +{"base_lr": 1.999892108297491e-05, "lr": 1.999892108297491e-05, "data_time": 0.00838327407836914, "loss": 1.796875, "time": 1.0031108856201172, "tflops": 14.801318191743723, "tokens_per_sec": 244.24019668403676, "iter": 270, "memory": 16136, "step": 270} +{"base_lr": 1.9997717378371684e-05, "lr": 1.9997717378371684e-05, "data_time": 0.008157968521118164, "loss": 0.05078125, "time": 1.0156772136688232, "tflops": 4.941657033774531, "tokens_per_sec": 81.71887572441068, "iter": 280, "memory": 16136, "step": 280} +{"base_lr": 1.9996067935694643e-05, "lr": 1.9996067935694643e-05, "data_time": 0.008051395416259766, "loss": 1.953125, "time": 1.3622171878814697, "tflops": 6.575672643288391, "tokens_per_sec": 108.64640478517383, "iter": 290, "memory": 16136, "step": 290} +{"base_lr": 1.9993972828482525e-05, "lr": 1.9993972828482525e-05, "data_time": 0.007958173751831055, "loss": 0.224609375, "time": 0.9970955848693848, "tflops": 5.155180564266755, "tokens_per_sec": 85.24759440294723, "iter": 300, "memory": 16137, "step": 300} +{"base_lr": 1.999143215014356e-05, "lr": 1.999143215014356e-05, "data_time": 0.008297204971313477, "loss": 1.578125, "time": 1.017378330230713, "tflops": 16.5068652694098, "tokens_per_sec": 272.2684293235456, "iter": 310, "memory": 16137, "step": 310} +{"base_lr": 1.9988446013951303e-05, "lr": 1.9988446013951303e-05, "data_time": 0.008132696151733398, "loss": 1.3359375, "time": 1.256601095199585, "tflops": 2.693922575087076, "tokens_per_sec": 44.564659551773666, "iter": 320, "memory": 16135, "step": 320} +{"base_lr": 1.998501455303961e-05, "lr": 1.998501455303961e-05, "data_time": 0.007825374603271484, "loss": 1.3828125, "time": 0.996518611907959, "tflops": 11.66793594431557, "tokens_per_sec": 192.67076169525768, "iter": 330, "memory": 16136, "step": 330} +{"base_lr": 1.9981137920396665e-05, "lr": 1.9981137920396665e-05, "data_time": 0.007911443710327148, "loss": 2.1875, "time": 1.0248513221740723, "tflops": 10.575369130646001, "tokens_per_sec": 174.65948096754462, "iter": 340, "memory": 16136, "step": 340} +{"base_lr": 1.997681628885821e-05, "lr": 1.997681628885821e-05, "data_time": 0.008276939392089844, "loss": 0.3984375, "time": 1.218977451324463, "tflops": 2.826698365950343, "tokens_per_sec": 46.76050400934052, "iter": 350, "memory": 16136, "step": 350} +{"base_lr": 1.9972049851099802e-05, "lr": 1.9972049851099802e-05, "data_time": 0.008832454681396484, "loss": 0.33984375, "time": 1.3008918762207031, "tflops": 2.7417170288979134, "tokens_per_sec": 45.353500224291494, "iter": 360, "memory": 16136, "step": 360} +{"base_lr": 1.9966838819628217e-05, "lr": 1.9966838819628217e-05, "data_time": 0.007926464080810547, "loss": 1.078125, "time": 0.9902834892272949, "tflops": 3.723862623614702, "tokens_per_sec": 61.59852270942727, "iter": 370, "memory": 16136, "step": 370} +{"base_lr": 1.9961183426772006e-05, "lr": 1.9961183426772006e-05, "data_time": 0.00855398178100586, "loss": 0.484375, "time": 1.0287930965423584, "tflops": 4.113819724004758, "tokens_per_sec": 68.04089202697119, "iter": 380, "memory": 16136, "step": 380} +{"base_lr": 1.995508392467111e-05, "lr": 1.995508392467111e-05, "data_time": 0.008545875549316406, "loss": 1.3984375, "time": 1.3397793769836426, "tflops": 7.500726063881778, "tokens_per_sec": 123.90099657572412, "iter": 390, "memory": 16136, "step": 390} +{"base_lr": 1.9948540585265638e-05, "lr": 1.9948540585265638e-05, "data_time": 0.007878780364990234, "loss": 1.625, "time": 0.9872312545776367, "tflops": 9.687698604171564, "tokens_per_sec": 160.04355541542947, "iter": 400, "memory": 16136, "step": 400} +{"base_lr": 1.994155370028372e-05, "lr": 1.994155370028372e-05, "data_time": 0.008478879928588867, "loss": 1.0234375, "time": 1.0021772384643555, "tflops": 3.55892877317159, "tokens_per_sec": 58.871822004606, "iter": 410, "memory": 16135, "step": 410} +{"base_lr": 1.993412358122854e-05, "lr": 1.993412358122854e-05, "data_time": 0.008288383483886719, "loss": 1.609375, "time": 0.9975581169128418, "tflops": 8.493145530178422, "tokens_per_sec": 140.342700466535, "iter": 420, "memory": 16136, "step": 420} +{"base_lr": 1.9926250559364396e-05, "lr": 1.9926250559364396e-05, "data_time": 0.007839679718017578, "loss": 1.125, "time": 0.9771850109100342, "tflops": 12.76877519540257, "tokens_per_sec": 210.80961916100742, "iter": 430, "memory": 16135, "step": 430} +{"base_lr": 1.9917934985701986e-05, "lr": 1.9917934985701986e-05, "data_time": 0.008036613464355469, "loss": 1.0703125, "time": 1.334458827972412, "tflops": 7.621564447980587, "tokens_per_sec": 125.89373046086007, "iter": 440, "memory": 16138, "step": 440} +{"base_lr": 1.9909177230982703e-05, "lr": 1.9909177230982703e-05, "data_time": 0.007050991058349609, "loss": 1.828125, "time": 0.9914636611938477, "tflops": 9.3404598310901, "tokens_per_sec": 154.31730479724726, "iter": 450, "memory": 16136, "step": 450} +{"base_lr": 1.989997768566215e-05, "lr": 1.989997768566215e-05, "data_time": 0.008552312850952148, "loss": 0.08544921875, "time": 1.0815768241882324, "tflops": 4.640565824566757, "tokens_per_sec": 76.73981001046148, "iter": 460, "memory": 16136, "step": 460} +{"base_lr": 1.9890336759892712e-05, "lr": 1.9890336759892712e-05, "data_time": 0.01013946533203125, "loss": 2.078125, "time": 1.0961353778839111, "tflops": 11.216914024890583, "tokens_per_sec": 185.19610268551517, "iter": 470, "memory": 16136, "step": 470} +{"base_lr": 1.9880254883505262e-05, "lr": 1.9880254883505262e-05, "data_time": 0.008694887161254883, "loss": 1.796875, "time": 0.9876737594604492, "tflops": 8.148478634649937, "tokens_per_sec": 134.65984969826593, "iter": 480, "memory": 16136, "step": 480} +{"base_lr": 1.9869732505990023e-05, "lr": 1.9869732505990023e-05, "data_time": 0.008504390716552734, "loss": 1.40625, "time": 0.9924230575561523, "tflops": 6.521920544776598, "tokens_per_sec": 107.81692261703424, "iter": 490, "memory": 16137, "step": 490} +{"base_lr": 1.9858770096476526e-05, "lr": 1.9858770096476526e-05, "data_time": 0.008666276931762695, "loss": 2.125, "time": 0.992159366607666, "tflops": 9.517306805373783, "tokens_per_sec": 157.2328047793662, "iter": 500, "memory": 16136, "step": 500} +{"base_lr": 1.9847368143712633e-05, "lr": 1.9847368143712633e-05, "data_time": 0.008151054382324219, "loss": 0.408203125, "time": 1.270345687866211, "tflops": 2.807643182368817, "tokens_per_sec": 46.444051066962224, "iter": 510, "memory": 16136, "step": 510} +{"base_lr": 1.983552715604285e-05, "lr": 1.983552715604285e-05, "data_time": 0.008281230926513672, "loss": 0.02685546875, "time": 1.003929615020752, "tflops": 5.48194239043417, "tokens_per_sec": 90.64380474325216, "iter": 520, "memory": 16136, "step": 520} +{"base_lr": 1.982324766138556e-05, "lr": 1.982324766138556e-05, "data_time": 0.008232831954956055, "loss": 1.9765625, "time": 1.0655851364135742, "tflops": 7.211396923016332, "tokens_per_sec": 119.18334411769577, "iter": 530, "memory": 16136, "step": 530} +{"base_lr": 1.9810530207209583e-05, "lr": 1.9810530207209583e-05, "data_time": 0.008224725723266602, "loss": 0.33203125, "time": 1.0057597160339355, "tflops": 3.3658000059751982, "tokens_per_sec": 55.6793030255497, "iter": 540, "memory": 16136, "step": 540} +{"base_lr": 1.9797375360509683e-05, "lr": 1.9797375360509683e-05, "data_time": 0.008211851119995117, "loss": 1.9921875, "time": 0.9995062351226807, "tflops": 12.665923602793203, "tokens_per_sec": 209.10324783930736, "iter": 550, "memory": 16136, "step": 550} +{"base_lr": 1.9783783707781336e-05, "lr": 1.9783783707781336e-05, "data_time": 0.008047342300415039, "loss": 0.33203125, "time": 0.9975039958953857, "tflops": 3.636253842753295, "tokens_per_sec": 60.1501349837524, "iter": 560, "memory": 16136, "step": 560} +{"base_lr": 1.9769755854994595e-05, "lr": 1.9769755854994595e-05, "data_time": 0.00828409194946289, "loss": 1.390625, "time": 0.9887645244598389, "tflops": 15.139057467170348, "tokens_per_sec": 249.80669703404465, "iter": 570, "memory": 16136, "step": 570} +{"base_lr": 1.9755292427566994e-05, "lr": 1.9755292427566994e-05, "data_time": 0.008766412734985352, "loss": 1.671875, "time": 1.016294240951538, "tflops": 15.806099594448245, "tokens_per_sec": 260.7512562027553, "iter": 580, "memory": 16136, "step": 580} +{"base_lr": 1.9740394070335768e-05, "lr": 1.9740394070335768e-05, "data_time": 0.008598566055297852, "loss": 0.236328125, "time": 1.2463364601135254, "tflops": 1.5515677607748921, "tokens_per_sec": 25.67524984149106, "iter": 590, "memory": 16136, "step": 590} +{"base_lr": 1.9725061447529046e-05, "lr": 1.9725061447529046e-05, "data_time": 0.00830531120300293, "loss": 0.419921875, "time": 1.0020349025726318, "tflops": 3.317940466725104, "tokens_per_sec": 54.888307641517976, "iter": 600, "memory": 16136, "step": 600} +{"base_lr": 1.9709295242736235e-05, "lr": 1.9709295242736235e-05, "data_time": 0.009053468704223633, "loss": 1.609375, "time": 1.0155441761016846, "tflops": 8.521837292975563, "tokens_per_sec": 140.81120581951015, "iter": 610, "memory": 16135, "step": 610} +{"base_lr": 1.969309615887756e-05, "lr": 1.969309615887756e-05, "data_time": 0.008394002914428711, "loss": 1.5546875, "time": 0.9778509140014648, "tflops": 8.664312872686688, "tokens_per_sec": 143.17110921026057, "iter": 620, "memory": 16136, "step": 620} +{"base_lr": 1.967646491817272e-05, "lr": 1.967646491817272e-05, "data_time": 0.007830381393432617, "loss": 1.6796875, "time": 1.3388714790344238, "tflops": 6.690331698625274, "tokens_per_sec": 110.54085647311351, "iter": 630, "memory": 16136, "step": 630} +{"base_lr": 1.9659402262108696e-05, "lr": 1.9659402262108696e-05, "data_time": 0.008354663848876953, "loss": 1.3359375, "time": 1.2411527633666992, "tflops": 10.395693682674832, "tokens_per_sec": 171.6146523511364, "iter": 640, "memory": 16136, "step": 640} +{"base_lr": 1.964190895140667e-05, "lr": 1.964190895140667e-05, "data_time": 0.008706331253051758, "loss": 1.703125, "time": 1.014526605606079, "tflops": 10.02503424194579, "tokens_per_sec": 165.5944743799706, "iter": 650, "memory": 16136, "step": 650} +{"base_lr": 1.9623985765988126e-05, "lr": 1.9623985765988126e-05, "data_time": 0.008224010467529297, "loss": 1.59375, "time": 1.234361171722412, "tflops": 6.618223750212486, "tokens_per_sec": 109.36831382302258, "iter": 660, "memory": 16136, "step": 660} +{"base_lr": 1.960563350494008e-05, "lr": 1.960563350494008e-05, "data_time": 0.008541107177734375, "loss": 1.203125, "time": 1.2946748733520508, "tflops": 6.825043751096583, "tokens_per_sec": 112.76962502707549, "iter": 670, "memory": 16136, "step": 670} +{"base_lr": 1.958685298647946e-05, "lr": 1.958685298647946e-05, "data_time": 0.00795435905456543, "loss": 1.453125, "time": 1.1922430992126465, "tflops": 7.004570053610767, "tokens_per_sec": 115.74820612593103, "iter": 680, "memory": 16135, "step": 680} +{"base_lr": 1.95676450479166e-05, "lr": 1.95676450479166e-05, "data_time": 0.007901191711425781, "loss": 0.004730224609375, "time": 0.9766857624053955, "tflops": 5.448875192225441, "tokens_per_sec": 90.10062743536085, "iter": 690, "memory": 16135, "step": 690} +{"base_lr": 1.9548010545617934e-05, "lr": 1.9548010545617934e-05, "data_time": 0.008874177932739258, "loss": 0.228515625, "time": 0.9875307083129883, "tflops": 3.7955105400532565, "tokens_per_sec": 62.782857766370256, "iter": 700, "memory": 16136, "step": 700} +{"base_lr": 1.9527950354967795e-05, "lr": 1.9527950354967795e-05, "data_time": 0.008386850357055664, "loss": 1.3203125, "time": 0.9887571334838867, "tflops": 7.587847631530314, "tokens_per_sec": 125.40996752454313, "iter": 710, "memory": 16136, "step": 710} +{"base_lr": 1.9507465370329408e-05, "lr": 1.9507465370329408e-05, "data_time": 0.007892131805419922, "loss": 0.0849609375, "time": 1.3349566459655762, "tflops": 3.487700516432031, "tokens_per_sec": 57.6797757684843, "iter": 720, "memory": 16136, "step": 720} +{"base_lr": 1.9486556505004994e-05, "lr": 1.9486556505004994e-05, "data_time": 0.00815725326538086, "loss": 1.9375, "time": 1.0468125343322754, "tflops": 6.877582039937506, "tokens_per_sec": 113.67842483448308, "iter": 730, "memory": 16136, "step": 730} +{"base_lr": 1.946522469119509e-05, "lr": 1.946522469119509e-05, "data_time": 0.00836944580078125, "loss": 2.09375, "time": 0.9808824062347412, "tflops": 12.225341798713009, "tokens_per_sec": 201.85905949710093, "iter": 740, "memory": 16136, "step": 740} +{"base_lr": 1.9443470879956948e-05, "lr": 1.9443470879956948e-05, "data_time": 0.008220911026000977, "loss": 0.4453125, "time": 0.9958882331848145, "tflops": 3.8851700215139298, "tokens_per_sec": 64.2642395675929, "iter": 750, "memory": 16136, "step": 750} +{"base_lr": 1.942129604116213e-05, "lr": 1.942129604116213e-05, "data_time": 0.008209943771362305, "loss": 1.203125, "time": 0.9805278778076172, "tflops": 8.022403955204373, "tokens_per_sec": 132.58164601147, "iter": 760, "memory": 16136, "step": 760} +{"base_lr": 1.9398701163453314e-05, "lr": 1.9398701163453314e-05, "data_time": 0.008694887161254883, "loss": 1.4296875, "time": 0.9941167831420898, "tflops": 11.635054608604412, "tokens_per_sec": 192.1303444813768, "iter": 770, "memory": 16136, "step": 770} +{"base_lr": 1.9375687254200167e-05, "lr": 1.9375687254200167e-05, "data_time": 0.008378744125366211, "loss": 0.3125, "time": 1.015552282333374, "tflops": 3.5120569091662572, "tokens_per_sec": 58.096467337339945, "iter": 780, "memory": 16136, "step": 780} +{"base_lr": 1.9352255339454455e-05, "lr": 1.9352255339454455e-05, "data_time": 0.00817108154296875, "loss": 1.4609375, "time": 1.0136175155639648, "tflops": 9.555200927286311, "tokens_per_sec": 157.85046878439152, "iter": 790, "memory": 16136, "step": 790} +{"base_lr": 1.932840646390429e-05, "lr": 1.932840646390429e-05, "data_time": 0.008602142333984375, "loss": 1.796875, "time": 1.2164642810821533, "tflops": 7.014621620292943, "tokens_per_sec": 115.90969187722636, "iter": 800, "memory": 16135, "step": 800} +{"base_lr": 1.9304141690827576e-05, "lr": 1.9304141690827576e-05, "data_time": 0.008701324462890625, "loss": 0.0111083984375, "time": 0.9806404113769531, "tflops": 5.241684638120516, "tokens_per_sec": 86.67805141801337, "iter": 810, "memory": 16135, "step": 810} +{"base_lr": 1.9279462102044556e-05, "lr": 1.9279462102044556e-05, "data_time": 0.008332252502441406, "loss": 2.09375, "time": 1.3954112529754639, "tflops": 6.853892590141893, "tokens_per_sec": 113.22826848570996, "iter": 820, "memory": 16136, "step": 820} +{"base_lr": 1.9254368797869636e-05, "lr": 1.9254368797869636e-05, "data_time": 0.008191585540771484, "loss": 0.10595703125, "time": 1.004793405532837, "tflops": 3.6098741475247995, "tokens_per_sec": 59.71376769548222, "iter": 830, "memory": 16135, "step": 830} +{"base_lr": 1.9228862897062295e-05, "lr": 1.9228862897062295e-05, "data_time": 0.008447408676147461, "loss": 0.041259765625, "time": 1.009345531463623, "tflops": 5.272563909460897, "tokens_per_sec": 87.18520789635491, "iter": 840, "memory": 16135, "step": 840} +{"base_lr": 1.92029455367772e-05, "lr": 1.92029455367772e-05, "data_time": 0.008902549743652344, "loss": 1.453125, "time": 1.3406057357788086, "tflops": 7.315085534172829, "tokens_per_sec": 120.84089727228208, "iter": 850, "memory": 16136, "step": 850} +{"base_lr": 1.917661787251355e-05, "lr": 1.917661787251355e-05, "data_time": 0.008685588836669922, "loss": 1.8203125, "time": 0.9942400455474854, "tflops": 7.972727601277693, "tokens_per_sec": 131.758925408936, "iter": 860, "memory": 16136, "step": 860} +{"base_lr": 1.91498810780635e-05, "lr": 1.91498810780635e-05, "data_time": 0.009018659591674805, "loss": 0.234375, "time": 1.0167601108551025, "tflops": 4.817316773487389, "tokens_per_sec": 79.66480897032707, "iter": 870, "memory": 16137, "step": 870} +{"base_lr": 1.9122736345459878e-05, "lr": 1.9122736345459878e-05, "data_time": 0.008199453353881836, "loss": 0.23828125, "time": 0.9912996292114258, "tflops": 3.9031539953525693, "tokens_per_sec": 64.56171082283885, "iter": 880, "memory": 16136, "step": 880} +{"base_lr": 1.9095184884923014e-05, "lr": 1.9095184884923014e-05, "data_time": 0.008847951889038086, "loss": 0.421875, "time": 0.9784226417541504, "tflops": 3.9545232737165783, "tokens_per_sec": 65.41140532601857, "iter": 890, "memory": 16136, "step": 890} +{"base_lr": 1.9067227924806787e-05, "lr": 1.9067227924806787e-05, "data_time": 0.009274005889892578, "loss": 1.734375, "time": 1.0109434127807617, "tflops": 11.801749360117343, "tokens_per_sec": 194.86748467743124, "iter": 900, "memory": 16138, "step": 900} +{"base_lr": 1.9038866711543873e-05, "lr": 1.9038866711543873e-05, "data_time": 0.008600234985351562, "loss": 1.3046875, "time": 0.9966461658477783, "tflops": 17.949478673451647, "tokens_per_sec": 295.9927104608563, "iter": 910, "memory": 16137, "step": 910} +{"base_lr": 1.9010102509590144e-05, "lr": 1.9010102509590144e-05, "data_time": 0.008765697479248047, "loss": 1.5, "time": 0.9850969314575195, "tflops": 8.846781627071762, "tokens_per_sec": 146.17850832891722, "iter": 920, "memory": 16136, "step": 920} +{"base_lr": 1.8980936601368332e-05, "lr": 1.8980936601368332e-05, "data_time": 0.008085966110229492, "loss": 1.8046875, "time": 0.9907898902893066, "tflops": 11.429032672824382, "tokens_per_sec": 188.73830045359873, "iter": 930, "memory": 16136, "step": 930} +{"base_lr": 1.8951370287210828e-05, "lr": 1.8951370287210828e-05, "data_time": 0.008547782897949219, "loss": 1.671875, "time": 1.0107839107513428, "tflops": 8.44199888386502, "tokens_per_sec": 139.4956909187953, "iter": 940, "memory": 16136, "step": 940} +{"base_lr": 1.892140488530172e-05, "lr": 1.892140488530172e-05, "data_time": 0.008234977722167969, "loss": 0.275390625, "time": 0.9876904487609863, "tflops": 3.4886249777052774, "tokens_per_sec": 57.71038899024107, "iter": 950, "memory": 16136, "step": 950} +{"base_lr": 1.8891041731618008e-05, "lr": 1.8891041731618008e-05, "data_time": 0.008466005325317383, "loss": 0.09814453125, "time": 1.6612880229949951, "tflops": 2.984787177391186, "tokens_per_sec": 49.35929162489223, "iter": 960, "memory": 16136, "step": 960} +{"base_lr": 1.886028217987006e-05, "lr": 1.886028217987006e-05, "data_time": 0.008612871170043945, "loss": 1.3359375, "time": 1.3181753158569336, "tflops": 8.820765486345318, "tokens_per_sec": 145.65589090479736, "iter": 970, "memory": 16136, "step": 970} +{"base_lr": 1.8829127601441246e-05, "lr": 1.8829127601441246e-05, "data_time": 0.008281230926513672, "loss": 0.04931640625, "time": 1.0200905799865723, "tflops": 5.4544528999961805, "tokens_per_sec": 90.1880693782319, "iter": 980, "memory": 16136, "step": 980} +{"base_lr": 1.879757938532679e-05, "lr": 1.879757938532679e-05, "data_time": 0.008590459823608398, "loss": 1.3203125, "time": 0.9922387599945068, "tflops": 10.678301939392066, "tokens_per_sec": 176.36884090356685, "iter": 990, "memory": 16136, "step": 990} +{"base_lr": 1.876563893807188e-05, "lr": 1.876563893807188e-05, "data_time": 0.008165836334228516, "loss": 1.5, "time": 1.3549275398254395, "tflops": 7.909500345475874, "tokens_per_sec": 130.63429209112758, "iter": 1000, "memory": 16136, "step": 1000} +{"base_lr": 1.8733307683708896e-05, "lr": 1.8733307683708896e-05, "data_time": 0.00855875015258789, "loss": 1.8984375, "time": 1.2368974685668945, "tflops": 14.216947921900243, "tokens_per_sec": 234.45759035772625, "iter": 1010, "memory": 16137, "step": 1010} +{"base_lr": 1.8700587063693977e-05, "lr": 1.8700587063693977e-05, "data_time": 0.008855342864990234, "loss": 0.25, "time": 1.0061523914337158, "tflops": 4.567301532531489, "tokens_per_sec": 75.5352774062668, "iter": 1020, "memory": 16136, "step": 1020} +{"base_lr": 1.8667478536842736e-05, "lr": 1.8667478536842736e-05, "data_time": 0.008416891098022461, "loss": 1.6953125, "time": 2.9754812717437744, "tflops": 2.7862802634178805, "tokens_per_sec": 46.04297170375581, "iter": 1030, "memory": 16136, "step": 1030} diff --git a/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/config.py b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/config.py new file mode 100644 index 0000000000000000000000000000000000000000..cb0154352ffa1176706336c43161b9d53976334a --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/config.py @@ -0,0 +1,261 @@ +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b/iter_4603.pth' +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-05, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2c_hard.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2c' diff --git a/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/scalars.json b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/scalars.json new file mode 100644 index 0000000000000000000000000000000000000000..a9267c8bae7c4b0de9965793d712eead1ab6fe14 --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2c/20250926_014218/vis_data/scalars.json @@ -0,0 +1,103 @@ +{"base_lr": 9.243902439024397e-07, "lr": 9.243902439024397e-07, "data_time": 0.008118867874145508, "loss": 2.0625, "time": 1.0140972137451172, "tflops": 9.191799003961803, "tokens_per_sec": 151.859208281539, "iter": 10, "memory": 15745, "step": 10} +{"base_lr": 1.7292682926829274e-06, "lr": 1.7292682926829274e-06, "data_time": 0.008229255676269531, "loss": 2.046875, "time": 1.0191514492034912, "tflops": 3.7964868826631593, "tokens_per_sec": 62.79733993407538, "iter": 20, "memory": 15746, "step": 20} +{"base_lr": 2.5341463414634137e-06, "lr": 2.5341463414634137e-06, "data_time": 0.008212089538574219, "loss": 1.484375, "time": 1.0137302875518799, "tflops": 8.357653278972437, "tokens_per_sec": 138.1037951800341, "iter": 30, "memory": 15746, "step": 30} +{"base_lr": 3.339024390243901e-06, "lr": 3.339024390243901e-06, "data_time": 0.008138179779052734, "loss": 0.92578125, "time": 1.0034892559051514, "tflops": 6.268907941571752, "tokens_per_sec": 103.63837917336537, "iter": 40, "memory": 15745, "step": 40} +{"base_lr": 4.143902439024386e-06, "lr": 4.143902439024386e-06, "data_time": 0.008208990097045898, "loss": 1.0390625, "time": 0.9895339012145996, "tflops": 7.826883824658906, "tokens_per_sec": 129.35382996252835, "iter": 50, "memory": 15745, "step": 50} +{"base_lr": 4.948780487804875e-06, "lr": 4.948780487804875e-06, "data_time": 0.008820295333862305, "loss": 2.0, "time": 1.0207452774047852, "tflops": 9.250775194217402, "tokens_per_sec": 152.82950943106255, "iter": 60, "memory": 15745, "step": 60} +{"base_lr": 5.7536585365853625e-06, "lr": 5.7536585365853625e-06, "data_time": 0.008440256118774414, "loss": 1.6953125, "time": 0.9985389709472656, "tflops": 7.756299063641921, "tokens_per_sec": 128.18728534795633, "iter": 70, "memory": 16136, "step": 70} +{"base_lr": 6.5585365853658505e-06, "lr": 6.5585365853658505e-06, "data_time": 0.008870840072631836, "loss": 0.287109375, "time": 0.9834458827972412, "tflops": 3.442168112604207, "tokens_per_sec": 56.94263505446865, "iter": 80, "memory": 16136, "step": 80} +{"base_lr": 7.363414634146338e-06, "lr": 7.363414634146338e-06, "data_time": 0.00815582275390625, "loss": 0.28515625, "time": 1.1788604259490967, "tflops": 2.9228918826543606, "tokens_per_sec": 48.35178002863327, "iter": 90, "memory": 16135, "step": 90} +{"base_lr": 8.16829268292683e-06, "lr": 8.16829268292683e-06, "data_time": 0.00822758674621582, "loss": 2.171875, "time": 0.9815342426300049, "tflops": 9.620331804755848, "tokens_per_sec": 158.9348524223074, "iter": 100, "memory": 16136, "step": 100} +{"base_lr": 8.973170731707318e-06, "lr": 8.973170731707318e-06, "data_time": 0.007964611053466797, "loss": 2.453125, "time": 0.9912879467010498, "tflops": 11.9745029830851, "tokens_per_sec": 197.72256956425142, "iter": 110, "memory": 16136, "step": 110} +{"base_lr": 9.77804878048781e-06, "lr": 9.77804878048781e-06, "data_time": 0.008516788482666016, "loss": 1.4453125, "time": 0.9811723232269287, "tflops": 6.349762056964083, "tokens_per_sec": 104.9764629123898, "iter": 120, "memory": 16136, "step": 120} +{"base_lr": 1.0582926829268297e-05, "lr": 1.0582926829268297e-05, "data_time": 0.008511066436767578, "loss": 0.004638671875, "time": 1.0091347694396973, "tflops": 4.973694890879441, "tokens_per_sec": 82.24867729609782, "iter": 130, "memory": 16138, "step": 130} +{"base_lr": 1.1387804878048789e-05, "lr": 1.1387804878048789e-05, "data_time": 0.008749008178710938, "loss": 1.9609375, "time": 0.9933547973632812, "tflops": 12.071842520309946, "tokens_per_sec": 199.32455203857018, "iter": 140, "memory": 16136, "step": 140} +{"base_lr": 1.2192682926829275e-05, "lr": 1.2192682926829275e-05, "data_time": 0.008867025375366211, "loss": 1.703125, "time": 0.9955644607543945, "tflops": 10.642630882143598, "tokens_per_sec": 175.7796776586591, "iter": 150, "memory": 16136, "step": 150} +{"base_lr": 1.2997560975609762e-05, "lr": 1.2997560975609762e-05, "data_time": 0.008375406265258789, "loss": 0.46484375, "time": 0.9964370727539062, "tflops": 3.5794306608153432, "tokens_per_sec": 59.210964358119824, "iter": 160, "memory": 16135, "step": 160} +{"base_lr": 1.3802439024390248e-05, "lr": 1.3802439024390248e-05, "data_time": 0.008290290832519531, "loss": 1.484375, "time": 0.9921352863311768, "tflops": 12.147885835638707, "tokens_per_sec": 200.5774844836764, "iter": 170, "memory": 16136, "step": 170} +{"base_lr": 1.4607317073170733e-05, "lr": 1.4607317073170733e-05, "data_time": 0.00788116455078125, "loss": 0.4296875, "time": 1.3137102127075195, "tflops": 3.0834235403563524, "tokens_per_sec": 51.000593092645516, "iter": 180, "memory": 16135, "step": 180} +{"base_lr": 1.5412195121951217e-05, "lr": 1.5412195121951217e-05, "data_time": 0.00821065902709961, "loss": 1.5, "time": 0.9913232326507568, "tflops": 14.241470856292839, "tokens_per_sec": 235.03938203559773, "iter": 190, "memory": 16135, "step": 190} +{"base_lr": 1.6217073170731705e-05, "lr": 1.6217073170731705e-05, "data_time": 0.008462905883789062, "loss": 1.640625, "time": 1.0160644054412842, "tflops": 7.503214783753246, "tokens_per_sec": 124.00788702479275, "iter": 200, "memory": 16136, "step": 200} +{"base_lr": 1.7021951219512186e-05, "lr": 1.7021951219512186e-05, "data_time": 0.008041620254516602, "loss": 0.076171875, "time": 0.9934191703796387, "tflops": 5.113317179741633, "tokens_per_sec": 84.55645160120531, "iter": 210, "memory": 16136, "step": 210} +{"base_lr": 1.7826829268292678e-05, "lr": 1.7826829268292678e-05, "data_time": 0.008035898208618164, "loss": 1.7109375, "time": 1.005375862121582, "tflops": 11.021711485674135, "tokens_per_sec": 182.02147763289688, "iter": 220, "memory": 16136, "step": 220} +{"base_lr": 1.863170731707317e-05, "lr": 1.863170731707317e-05, "data_time": 0.008314847946166992, "loss": 2.125, "time": 0.9970285892486572, "tflops": 10.200975248092162, "tokens_per_sec": 168.50068474609466, "iter": 230, "memory": 16136, "step": 230} +{"base_lr": 1.9436585365853654e-05, "lr": 1.9436585365853654e-05, "data_time": 0.007876873016357422, "loss": 7.625, "time": 1.2310943603515625, "tflops": 1.816332265701899, "tokens_per_sec": 30.054560553265702, "iter": 240, "memory": 16136, "step": 240} +{"base_lr": 1.9999991083171344e-05, "lr": 1.9999991083171344e-05, "data_time": 0.008183002471923828, "loss": 1.8359375, "time": 0.9984240531921387, "tflops": 9.518341566590484, "tokens_per_sec": 157.2478141906597, "iter": 250, "memory": 16135, "step": 250} +{"base_lr": 1.9999678995838395e-05, "lr": 1.9999678995838395e-05, "data_time": 0.008537769317626953, "loss": 1.5, "time": 0.9947342872619629, "tflops": 8.33441135767381, "tokens_per_sec": 137.72522145281536, "iter": 260, "memory": 16136, "step": 260} +{"base_lr": 1.999892108297491e-05, "lr": 1.999892108297491e-05, "data_time": 0.00838327407836914, "loss": 1.796875, "time": 1.0031108856201172, "tflops": 14.801318191743723, "tokens_per_sec": 244.24019668403676, "iter": 270, "memory": 16136, "step": 270} +{"base_lr": 1.9997717378371684e-05, "lr": 1.9997717378371684e-05, "data_time": 0.008157968521118164, "loss": 0.05078125, "time": 1.0156772136688232, "tflops": 4.941657033774531, "tokens_per_sec": 81.71887572441068, "iter": 280, "memory": 16136, "step": 280} +{"base_lr": 1.9996067935694643e-05, "lr": 1.9996067935694643e-05, "data_time": 0.008051395416259766, "loss": 1.953125, "time": 1.3622171878814697, "tflops": 6.575672643288391, "tokens_per_sec": 108.64640478517383, "iter": 290, "memory": 16136, "step": 290} +{"base_lr": 1.9993972828482525e-05, "lr": 1.9993972828482525e-05, "data_time": 0.007958173751831055, "loss": 0.224609375, "time": 0.9970955848693848, "tflops": 5.155180564266755, "tokens_per_sec": 85.24759440294723, "iter": 300, "memory": 16137, "step": 300} +{"base_lr": 1.999143215014356e-05, "lr": 1.999143215014356e-05, "data_time": 0.008297204971313477, "loss": 1.578125, "time": 1.017378330230713, "tflops": 16.5068652694098, "tokens_per_sec": 272.2684293235456, "iter": 310, "memory": 16137, "step": 310} +{"base_lr": 1.9988446013951303e-05, "lr": 1.9988446013951303e-05, "data_time": 0.008132696151733398, "loss": 1.3359375, "time": 1.256601095199585, "tflops": 2.693922575087076, "tokens_per_sec": 44.564659551773666, "iter": 320, "memory": 16135, "step": 320} +{"base_lr": 1.998501455303961e-05, "lr": 1.998501455303961e-05, "data_time": 0.007825374603271484, "loss": 1.3828125, "time": 0.996518611907959, "tflops": 11.66793594431557, "tokens_per_sec": 192.67076169525768, "iter": 330, "memory": 16136, "step": 330} +{"base_lr": 1.9981137920396665e-05, "lr": 1.9981137920396665e-05, "data_time": 0.007911443710327148, "loss": 2.1875, "time": 1.0248513221740723, "tflops": 10.575369130646001, "tokens_per_sec": 174.65948096754462, "iter": 340, "memory": 16136, "step": 340} +{"base_lr": 1.997681628885821e-05, "lr": 1.997681628885821e-05, "data_time": 0.008276939392089844, "loss": 0.3984375, "time": 1.218977451324463, "tflops": 2.826698365950343, "tokens_per_sec": 46.76050400934052, "iter": 350, "memory": 16136, "step": 350} +{"base_lr": 1.9972049851099802e-05, "lr": 1.9972049851099802e-05, "data_time": 0.008832454681396484, "loss": 0.33984375, "time": 1.3008918762207031, "tflops": 2.7417170288979134, "tokens_per_sec": 45.353500224291494, "iter": 360, "memory": 16136, "step": 360} +{"base_lr": 1.9966838819628217e-05, "lr": 1.9966838819628217e-05, "data_time": 0.007926464080810547, "loss": 1.078125, "time": 0.9902834892272949, "tflops": 3.723862623614702, "tokens_per_sec": 61.59852270942727, "iter": 370, "memory": 16136, "step": 370} +{"base_lr": 1.9961183426772006e-05, "lr": 1.9961183426772006e-05, "data_time": 0.00855398178100586, "loss": 0.484375, "time": 1.0287930965423584, "tflops": 4.113819724004758, "tokens_per_sec": 68.04089202697119, "iter": 380, "memory": 16136, "step": 380} +{"base_lr": 1.995508392467111e-05, "lr": 1.995508392467111e-05, "data_time": 0.008545875549316406, "loss": 1.3984375, "time": 1.3397793769836426, "tflops": 7.500726063881778, "tokens_per_sec": 123.90099657572412, "iter": 390, "memory": 16136, "step": 390} +{"base_lr": 1.9948540585265638e-05, "lr": 1.9948540585265638e-05, "data_time": 0.007878780364990234, "loss": 1.625, "time": 0.9872312545776367, "tflops": 9.687698604171564, "tokens_per_sec": 160.04355541542947, "iter": 400, "memory": 16136, "step": 400} +{"base_lr": 1.994155370028372e-05, "lr": 1.994155370028372e-05, "data_time": 0.008478879928588867, "loss": 1.0234375, "time": 1.0021772384643555, "tflops": 3.55892877317159, "tokens_per_sec": 58.871822004606, "iter": 410, "memory": 16135, "step": 410} +{"base_lr": 1.993412358122854e-05, "lr": 1.993412358122854e-05, "data_time": 0.008288383483886719, "loss": 1.609375, "time": 0.9975581169128418, "tflops": 8.493145530178422, "tokens_per_sec": 140.342700466535, "iter": 420, "memory": 16136, "step": 420} +{"base_lr": 1.9926250559364396e-05, "lr": 1.9926250559364396e-05, "data_time": 0.007839679718017578, "loss": 1.125, "time": 0.9771850109100342, "tflops": 12.76877519540257, "tokens_per_sec": 210.80961916100742, "iter": 430, "memory": 16135, "step": 430} +{"base_lr": 1.9917934985701986e-05, "lr": 1.9917934985701986e-05, "data_time": 0.008036613464355469, "loss": 1.0703125, "time": 1.334458827972412, "tflops": 7.621564447980587, "tokens_per_sec": 125.89373046086007, "iter": 440, "memory": 16138, "step": 440} +{"base_lr": 1.9909177230982703e-05, "lr": 1.9909177230982703e-05, "data_time": 0.007050991058349609, "loss": 1.828125, "time": 0.9914636611938477, "tflops": 9.3404598310901, "tokens_per_sec": 154.31730479724726, "iter": 450, "memory": 16136, "step": 450} +{"base_lr": 1.989997768566215e-05, "lr": 1.989997768566215e-05, "data_time": 0.008552312850952148, "loss": 0.08544921875, "time": 1.0815768241882324, "tflops": 4.640565824566757, "tokens_per_sec": 76.73981001046148, "iter": 460, "memory": 16136, "step": 460} +{"base_lr": 1.9890336759892712e-05, "lr": 1.9890336759892712e-05, "data_time": 0.01013946533203125, "loss": 2.078125, "time": 1.0961353778839111, "tflops": 11.216914024890583, "tokens_per_sec": 185.19610268551517, "iter": 470, "memory": 16136, "step": 470} +{"base_lr": 1.9880254883505262e-05, "lr": 1.9880254883505262e-05, "data_time": 0.008694887161254883, "loss": 1.796875, "time": 0.9876737594604492, "tflops": 8.148478634649937, "tokens_per_sec": 134.65984969826593, "iter": 480, "memory": 16136, "step": 480} +{"base_lr": 1.9869732505990023e-05, "lr": 1.9869732505990023e-05, "data_time": 0.008504390716552734, "loss": 1.40625, "time": 0.9924230575561523, "tflops": 6.521920544776598, "tokens_per_sec": 107.81692261703424, "iter": 490, "memory": 16137, "step": 490} +{"base_lr": 1.9858770096476526e-05, "lr": 1.9858770096476526e-05, "data_time": 0.008666276931762695, "loss": 2.125, "time": 0.992159366607666, "tflops": 9.517306805373783, "tokens_per_sec": 157.2328047793662, "iter": 500, "memory": 16136, "step": 500} +{"base_lr": 1.9847368143712633e-05, "lr": 1.9847368143712633e-05, "data_time": 0.008151054382324219, "loss": 0.408203125, "time": 1.270345687866211, "tflops": 2.807643182368817, "tokens_per_sec": 46.444051066962224, "iter": 510, "memory": 16136, "step": 510} +{"base_lr": 1.983552715604285e-05, "lr": 1.983552715604285e-05, "data_time": 0.008281230926513672, "loss": 0.02685546875, "time": 1.003929615020752, "tflops": 5.48194239043417, "tokens_per_sec": 90.64380474325216, "iter": 520, "memory": 16136, "step": 520} +{"base_lr": 1.982324766138556e-05, "lr": 1.982324766138556e-05, "data_time": 0.008232831954956055, "loss": 1.9765625, "time": 1.0655851364135742, "tflops": 7.211396923016332, "tokens_per_sec": 119.18334411769577, "iter": 530, "memory": 16136, "step": 530} +{"base_lr": 1.9810530207209583e-05, "lr": 1.9810530207209583e-05, "data_time": 0.008224725723266602, "loss": 0.33203125, "time": 1.0057597160339355, "tflops": 3.3658000059751982, "tokens_per_sec": 55.6793030255497, "iter": 540, "memory": 16136, "step": 540} +{"base_lr": 1.9797375360509683e-05, "lr": 1.9797375360509683e-05, "data_time": 0.008211851119995117, "loss": 1.9921875, "time": 0.9995062351226807, "tflops": 12.665923602793203, "tokens_per_sec": 209.10324783930736, "iter": 550, "memory": 16136, "step": 550} +{"base_lr": 1.9783783707781336e-05, "lr": 1.9783783707781336e-05, "data_time": 0.008047342300415039, "loss": 0.33203125, "time": 0.9975039958953857, "tflops": 3.636253842753295, "tokens_per_sec": 60.1501349837524, "iter": 560, "memory": 16136, "step": 560} +{"base_lr": 1.9769755854994595e-05, "lr": 1.9769755854994595e-05, "data_time": 0.00828409194946289, "loss": 1.390625, "time": 0.9887645244598389, "tflops": 15.139057467170348, "tokens_per_sec": 249.80669703404465, "iter": 570, "memory": 16136, "step": 570} +{"base_lr": 1.9755292427566994e-05, "lr": 1.9755292427566994e-05, "data_time": 0.008766412734985352, "loss": 1.671875, "time": 1.016294240951538, "tflops": 15.806099594448245, "tokens_per_sec": 260.7512562027553, "iter": 580, "memory": 16136, "step": 580} +{"base_lr": 1.9740394070335768e-05, "lr": 1.9740394070335768e-05, "data_time": 0.008598566055297852, "loss": 0.236328125, "time": 1.2463364601135254, "tflops": 1.5515677607748921, "tokens_per_sec": 25.67524984149106, "iter": 590, "memory": 16136, "step": 590} +{"base_lr": 1.9725061447529046e-05, "lr": 1.9725061447529046e-05, "data_time": 0.00830531120300293, "loss": 0.419921875, "time": 1.0020349025726318, "tflops": 3.317940466725104, "tokens_per_sec": 54.888307641517976, "iter": 600, "memory": 16136, "step": 600} +{"base_lr": 1.9709295242736235e-05, "lr": 1.9709295242736235e-05, "data_time": 0.009053468704223633, "loss": 1.609375, "time": 1.0155441761016846, "tflops": 8.521837292975563, "tokens_per_sec": 140.81120581951015, "iter": 610, "memory": 16135, "step": 610} +{"base_lr": 1.969309615887756e-05, "lr": 1.969309615887756e-05, "data_time": 0.008394002914428711, "loss": 1.5546875, "time": 0.9778509140014648, "tflops": 8.664312872686688, "tokens_per_sec": 143.17110921026057, "iter": 620, "memory": 16136, "step": 620} +{"base_lr": 1.967646491817272e-05, "lr": 1.967646491817272e-05, "data_time": 0.007830381393432617, "loss": 1.6796875, "time": 1.3388714790344238, "tflops": 6.690331698625274, "tokens_per_sec": 110.54085647311351, "iter": 630, "memory": 16136, "step": 630} +{"base_lr": 1.9659402262108696e-05, "lr": 1.9659402262108696e-05, "data_time": 0.008354663848876953, "loss": 1.3359375, "time": 1.2411527633666992, "tflops": 10.395693682674832, "tokens_per_sec": 171.6146523511364, "iter": 640, "memory": 16136, "step": 640} +{"base_lr": 1.964190895140667e-05, "lr": 1.964190895140667e-05, "data_time": 0.008706331253051758, "loss": 1.703125, "time": 1.014526605606079, "tflops": 10.02503424194579, "tokens_per_sec": 165.5944743799706, "iter": 650, "memory": 16136, "step": 650} +{"base_lr": 1.9623985765988126e-05, "lr": 1.9623985765988126e-05, "data_time": 0.008224010467529297, "loss": 1.59375, "time": 1.234361171722412, "tflops": 6.618223750212486, "tokens_per_sec": 109.36831382302258, "iter": 660, "memory": 16136, "step": 660} +{"base_lr": 1.960563350494008e-05, "lr": 1.960563350494008e-05, "data_time": 0.008541107177734375, "loss": 1.203125, "time": 1.2946748733520508, "tflops": 6.825043751096583, "tokens_per_sec": 112.76962502707549, "iter": 670, "memory": 16136, "step": 670} +{"base_lr": 1.958685298647946e-05, "lr": 1.958685298647946e-05, "data_time": 0.00795435905456543, "loss": 1.453125, "time": 1.1922430992126465, "tflops": 7.004570053610767, "tokens_per_sec": 115.74820612593103, "iter": 680, "memory": 16135, "step": 680} +{"base_lr": 1.95676450479166e-05, "lr": 1.95676450479166e-05, "data_time": 0.007901191711425781, "loss": 0.004730224609375, "time": 0.9766857624053955, "tflops": 5.448875192225441, "tokens_per_sec": 90.10062743536085, "iter": 690, "memory": 16135, "step": 690} +{"base_lr": 1.9548010545617934e-05, "lr": 1.9548010545617934e-05, "data_time": 0.008874177932739258, "loss": 0.228515625, "time": 0.9875307083129883, "tflops": 3.7955105400532565, "tokens_per_sec": 62.782857766370256, "iter": 700, "memory": 16136, "step": 700} +{"base_lr": 1.9527950354967795e-05, "lr": 1.9527950354967795e-05, "data_time": 0.008386850357055664, "loss": 1.3203125, "time": 0.9887571334838867, "tflops": 7.587847631530314, "tokens_per_sec": 125.40996752454313, "iter": 710, "memory": 16136, "step": 710} +{"base_lr": 1.9507465370329408e-05, "lr": 1.9507465370329408e-05, "data_time": 0.007892131805419922, "loss": 0.0849609375, "time": 1.3349566459655762, "tflops": 3.487700516432031, "tokens_per_sec": 57.6797757684843, "iter": 720, "memory": 16136, "step": 720} +{"base_lr": 1.9486556505004994e-05, "lr": 1.9486556505004994e-05, "data_time": 0.00815725326538086, "loss": 1.9375, "time": 1.0468125343322754, "tflops": 6.877582039937506, "tokens_per_sec": 113.67842483448308, "iter": 730, "memory": 16136, "step": 730} +{"base_lr": 1.946522469119509e-05, "lr": 1.946522469119509e-05, "data_time": 0.00836944580078125, "loss": 2.09375, "time": 0.9808824062347412, "tflops": 12.225341798713009, "tokens_per_sec": 201.85905949710093, "iter": 740, "memory": 16136, "step": 740} +{"base_lr": 1.9443470879956948e-05, "lr": 1.9443470879956948e-05, "data_time": 0.008220911026000977, "loss": 0.4453125, "time": 0.9958882331848145, "tflops": 3.8851700215139298, "tokens_per_sec": 64.2642395675929, "iter": 750, "memory": 16136, "step": 750} +{"base_lr": 1.942129604116213e-05, "lr": 1.942129604116213e-05, "data_time": 0.008209943771362305, "loss": 1.203125, "time": 0.9805278778076172, "tflops": 8.022403955204373, "tokens_per_sec": 132.58164601147, "iter": 760, "memory": 16136, "step": 760} +{"base_lr": 1.9398701163453314e-05, "lr": 1.9398701163453314e-05, "data_time": 0.008694887161254883, "loss": 1.4296875, "time": 0.9941167831420898, "tflops": 11.635054608604412, "tokens_per_sec": 192.1303444813768, "iter": 770, "memory": 16136, "step": 770} +{"base_lr": 1.9375687254200167e-05, "lr": 1.9375687254200167e-05, "data_time": 0.008378744125366211, "loss": 0.3125, "time": 1.015552282333374, "tflops": 3.5120569091662572, "tokens_per_sec": 58.096467337339945, "iter": 780, "memory": 16136, "step": 780} +{"base_lr": 1.9352255339454455e-05, "lr": 1.9352255339454455e-05, "data_time": 0.00817108154296875, "loss": 1.4609375, "time": 1.0136175155639648, "tflops": 9.555200927286311, "tokens_per_sec": 157.85046878439152, "iter": 790, "memory": 16136, "step": 790} +{"base_lr": 1.932840646390429e-05, "lr": 1.932840646390429e-05, "data_time": 0.008602142333984375, "loss": 1.796875, "time": 1.2164642810821533, "tflops": 7.014621620292943, "tokens_per_sec": 115.90969187722636, "iter": 800, "memory": 16135, "step": 800} +{"base_lr": 1.9304141690827576e-05, "lr": 1.9304141690827576e-05, "data_time": 0.008701324462890625, "loss": 0.0111083984375, "time": 0.9806404113769531, "tflops": 5.241684638120516, "tokens_per_sec": 86.67805141801337, "iter": 810, "memory": 16135, "step": 810} +{"base_lr": 1.9279462102044556e-05, "lr": 1.9279462102044556e-05, "data_time": 0.008332252502441406, "loss": 2.09375, "time": 1.3954112529754639, "tflops": 6.853892590141893, "tokens_per_sec": 113.22826848570996, "iter": 820, "memory": 16136, "step": 820} +{"base_lr": 1.9254368797869636e-05, "lr": 1.9254368797869636e-05, "data_time": 0.008191585540771484, "loss": 0.10595703125, "time": 1.004793405532837, "tflops": 3.6098741475247995, "tokens_per_sec": 59.71376769548222, "iter": 830, "memory": 16135, "step": 830} +{"base_lr": 1.9228862897062295e-05, "lr": 1.9228862897062295e-05, "data_time": 0.008447408676147461, "loss": 0.041259765625, "time": 1.009345531463623, "tflops": 5.272563909460897, "tokens_per_sec": 87.18520789635491, "iter": 840, "memory": 16135, "step": 840} +{"base_lr": 1.92029455367772e-05, "lr": 1.92029455367772e-05, "data_time": 0.008902549743652344, "loss": 1.453125, "time": 1.3406057357788086, "tflops": 7.315085534172829, "tokens_per_sec": 120.84089727228208, "iter": 850, "memory": 16136, "step": 850} +{"base_lr": 1.917661787251355e-05, "lr": 1.917661787251355e-05, "data_time": 0.008685588836669922, "loss": 1.8203125, "time": 0.9942400455474854, "tflops": 7.972727601277693, "tokens_per_sec": 131.758925408936, "iter": 860, "memory": 16136, "step": 860} +{"base_lr": 1.91498810780635e-05, "lr": 1.91498810780635e-05, "data_time": 0.009018659591674805, "loss": 0.234375, "time": 1.0167601108551025, "tflops": 4.817316773487389, "tokens_per_sec": 79.66480897032707, "iter": 870, "memory": 16137, "step": 870} +{"base_lr": 1.9122736345459878e-05, "lr": 1.9122736345459878e-05, "data_time": 0.008199453353881836, "loss": 0.23828125, "time": 0.9912996292114258, "tflops": 3.9031539953525693, "tokens_per_sec": 64.56171082283885, "iter": 880, "memory": 16136, "step": 880} +{"base_lr": 1.9095184884923014e-05, "lr": 1.9095184884923014e-05, "data_time": 0.008847951889038086, "loss": 0.421875, "time": 0.9784226417541504, "tflops": 3.9545232737165783, "tokens_per_sec": 65.41140532601857, "iter": 890, "memory": 16136, "step": 890} +{"base_lr": 1.9067227924806787e-05, "lr": 1.9067227924806787e-05, "data_time": 0.009274005889892578, "loss": 1.734375, "time": 1.0109434127807617, "tflops": 11.801749360117343, "tokens_per_sec": 194.86748467743124, "iter": 900, "memory": 16138, "step": 900} +{"base_lr": 1.9038866711543873e-05, "lr": 1.9038866711543873e-05, "data_time": 0.008600234985351562, "loss": 1.3046875, "time": 0.9966461658477783, "tflops": 17.949478673451647, "tokens_per_sec": 295.9927104608563, "iter": 910, "memory": 16137, "step": 910} +{"base_lr": 1.9010102509590144e-05, "lr": 1.9010102509590144e-05, "data_time": 0.008765697479248047, "loss": 1.5, "time": 0.9850969314575195, "tflops": 8.846781627071762, "tokens_per_sec": 146.17850832891722, "iter": 920, "memory": 16136, "step": 920} +{"base_lr": 1.8980936601368332e-05, "lr": 1.8980936601368332e-05, "data_time": 0.008085966110229492, "loss": 1.8046875, "time": 0.9907898902893066, "tflops": 11.429032672824382, "tokens_per_sec": 188.73830045359873, "iter": 930, "memory": 16136, "step": 930} +{"base_lr": 1.8951370287210828e-05, "lr": 1.8951370287210828e-05, "data_time": 0.008547782897949219, "loss": 1.671875, "time": 1.0107839107513428, "tflops": 8.44199888386502, "tokens_per_sec": 139.4956909187953, "iter": 940, "memory": 16136, "step": 940} +{"base_lr": 1.892140488530172e-05, "lr": 1.892140488530172e-05, "data_time": 0.008234977722167969, "loss": 0.275390625, "time": 0.9876904487609863, "tflops": 3.4886249777052774, "tokens_per_sec": 57.71038899024107, "iter": 950, "memory": 16136, "step": 950} +{"base_lr": 1.8891041731618008e-05, "lr": 1.8891041731618008e-05, "data_time": 0.008466005325317383, "loss": 0.09814453125, "time": 1.6612880229949951, "tflops": 2.984787177391186, "tokens_per_sec": 49.35929162489223, "iter": 960, "memory": 16136, "step": 960} +{"base_lr": 1.886028217987006e-05, "lr": 1.886028217987006e-05, "data_time": 0.008612871170043945, "loss": 1.3359375, "time": 1.3181753158569336, "tflops": 8.820765486345318, "tokens_per_sec": 145.65589090479736, "iter": 970, "memory": 16136, "step": 970} +{"base_lr": 1.8829127601441246e-05, "lr": 1.8829127601441246e-05, "data_time": 0.008281230926513672, "loss": 0.04931640625, "time": 1.0200905799865723, "tflops": 5.4544528999961805, "tokens_per_sec": 90.1880693782319, "iter": 980, "memory": 16136, "step": 980} +{"base_lr": 1.879757938532679e-05, "lr": 1.879757938532679e-05, "data_time": 0.008590459823608398, "loss": 1.3203125, "time": 0.9922387599945068, "tflops": 10.678301939392066, "tokens_per_sec": 176.36884090356685, "iter": 990, "memory": 16136, "step": 990} +{"base_lr": 1.876563893807188e-05, "lr": 1.876563893807188e-05, "data_time": 0.008165836334228516, "loss": 1.5, "time": 1.3549275398254395, "tflops": 7.909500345475874, "tokens_per_sec": 130.63429209112758, "iter": 1000, "memory": 16136, "step": 1000} +{"base_lr": 1.8733307683708896e-05, "lr": 1.8733307683708896e-05, "data_time": 0.00855875015258789, "loss": 1.8984375, "time": 1.2368974685668945, "tflops": 14.216947921900243, "tokens_per_sec": 234.45759035772625, "iter": 1010, "memory": 16137, "step": 1010} +{"base_lr": 1.8700587063693977e-05, "lr": 1.8700587063693977e-05, "data_time": 0.008855342864990234, "loss": 0.25, "time": 1.0061523914337158, "tflops": 4.567301532531489, "tokens_per_sec": 75.5352774062668, "iter": 1020, "memory": 16136, "step": 1020} +{"base_lr": 1.8667478536842736e-05, "lr": 1.8667478536842736e-05, "data_time": 0.008416891098022461, "loss": 1.6953125, "time": 2.9754812717437744, "tflops": 2.7862802634178805, "tokens_per_sec": 46.04297170375581, "iter": 1030, "memory": 16136, "step": 1030} diff --git a/stage_2/multi_stage2_run_stage1_both/stage2c/last_checkpoint b/stage_2/multi_stage2_run_stage1_both/stage2c/last_checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/stage_2/multi_stage2_run_stage1_both/stage2c/temp_config_stage_2c.py b/stage_2/multi_stage2_run_stage1_both/stage2c/temp_config_stage_2c.py new file mode 100644 index 0000000000000000000000000000000000000000..cb0154352ffa1176706336c43161b9d53976334a --- /dev/null +++ b/stage_2/multi_stage2_run_stage1_both/stage2c/temp_config_stage_2c.py @@ -0,0 +1,261 @@ +SYSTEM = '' +accumulative_counts = 64 +batch_size = 1 +betas = ( + 0.9, + 0.999, +) +bnb = dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig') +custom_hooks = [ + dict( + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.DatasetInfoHook'), + dict( + evaluation_images=[ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', + ], + evaluation_inputs=[ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', + ], + every_n_iters=512, + prompt_template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + system='', + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.engine.hooks.EvaluateChatHookResampler'), + dict(type='xtuner.engine.hooks.ThroughputHook'), +] +data_path = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json' +dataloader_num_workers = 10 +default_hooks = dict( + checkpoint=dict( + by_epoch=False, + interval=4096, + max_keep_ckpts=8, + type='mmengine.hooks.CheckpointHook'), + logger=dict( + interval=10, + log_metric_by_epoch=False, + type='mmengine.hooks.LoggerHook'), + param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'), + sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'), + timer=dict(type='mmengine.hooks.IterTimerHook')) +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +evaluation_freq = 512 +evaluation_images = [ + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EB-A5UN-06Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/skcm_224x224_b20_t15/h5_files/TCGA-EE-A3AG-01Z-00-DX1.h5', + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression/lusc_224x224_b20_t15/h5_files/TCGA-NC-A5HP-01Z-00-DX1.h5', +] +evaluation_inputs = [ + 'Are the tumor cells organized in a lobulated pattern within the slide?', + 'Craft a comprehensive outline capturing the key findings of the pathology report based on the whole slide image.', + 'Based on the observed features, what do you think is the correct histological classification of the tumor? A) Poorly differentiated keratinizing squamous cell carcinoma B) Moderately differentiated squamous cell carcinoma C) Well-differentiated squamous cell carcinoma D) Adenocarcinoma', +] +image_path_list = None +launcher = 'pytorch' +llava_dataset = dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/stage2_tasks_plus_report.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix='/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' +) +llm_lora = dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig') +llm_name_or_path = 'Qwen/Qwen2.5-7B-Instruct' +load_from = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2b/iter_4603.pth' +log_level = 'INFO' +log_processor = dict( + by_epoch=False, + mean_pattern='.*(loss|time|data_time|grad_norm|tflops).*', + window_size=1) +lr = 5e-06 +max_epochs = 2 +max_length = 15836 +max_norm = 1 +model = dict( + enable_token_merge=True, + freeze_llm=True, + freeze_mm_in_stage2=False, + llm=dict( + attn_implementation='flash_attention_2', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + quantization_config=dict( + bnb_4bit_compute_dtype='torch.bfloat16', + bnb_4bit_quant_type='nf4', + bnb_4bit_use_double_quant=True, + llm_int8_has_fp16_weight=False, + llm_int8_threshold=6.0, + load_in_4bit=True, + load_in_8bit=False, + type='transformers.BitsAndBytesConfig'), + torch_dtype='torch.bfloat16', + trust_remote_code=True, + type='transformers.AutoModelForCausalLM.from_pretrained'), + llm_lora=dict( + bias='none', + lora_alpha=256, + lora_dropout=0.05, + r=128, + task_type='CAUSAL_LM', + type='peft.LoraConfig'), + max_position_embeddings=None, + projector_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/projector/projector.safetensors', + resampler_num_latents=100, + resampler_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/resampler/resampler.safetensors', + token_merge_pth= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/checkpoints/stage_1/token_merge_plus_resampler/stage1_qwen25_both_hf/token_merger/merger.safetensors', + train_stage='2', + type='xtuner.model.llava_no_longnet_simple_sampler.LLaVAModel', + use_resampler=True) +optim_type = 'torch.optim.AdamW' +optim_wrapper = dict( + optimizer=dict( + betas=( + 0.9, + 0.999, + ), + lr=2e-05, + type='torch.optim.AdamW', + weight_decay=0.01), + paramwise_cfg=dict( + bias_decay_mult=0.0, + norm_decay_mult=0.0, + paramwise_cfg=dict( + custom_keys=dict({'^projector\.': dict(lr_mult=1.0)}))), + type='DeepSpeedOptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + convert_to_iter_based=True, + end=0.1, + start_factor=0.01, + type='mmengine.optim.LinearLR'), + dict( + begin=0.1, + by_epoch=True, + convert_to_iter_based=True, + end=2, + eta_min=0.0, + type='mmengine.optim.CosineAnnealingLR'), +] +per_image_length = 10240 +prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.qwen_chat' +randomness = dict(deterministic=False, seed=None) +resume = False +runner_type = 'FlexibleRunner' +sample_type = 'wsi' +save_steps = 4096 +save_total_limit = 8 +seed = 42 +strategy = dict( + config=dict( + bf16=dict(enabled=True), + fp16=dict(enabled=False, initial_scale_power=16), + gradient_accumulation_steps='auto', + gradient_clipping='auto', + train_micro_batch_size_per_gpu='auto', + zero_allow_untested_optimizer=True, + zero_force_ds_cpu_optimizer=False, + zero_optimization=dict(overlap_comm=False, stage=2)), + exclude_frozen_parameters=True, + gradient_accumulation_steps=64, + gradient_clipping=1, + sequence_parallel_size=1, + train_micro_batch_size_per_gpu=1, + type='xtuner.engine.DeepSpeedStrategy') +tokenizer = dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained') +train_cfg = dict(max_epochs=1, type='xtuner.engine.runner.TrainLoop') +train_dataloader = dict( + batch_size=1, + collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'), + dataset=dict( + data_path= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/merged_dataset_curriculum/stage2c_hard.json', + dataset_map_fn='xtuner.dataset.map_fns.llava_map_fn', + identifier='_224x224_b20_t15', + image_feature_prefix= + '/mnt/bn/xudong-va/meilong/datasets/Token_Compression', + image_feature_suffix='.h5', + image_folder='', + image_path_list=None, + max_length=15836, + pad_image_to_square=False, + per_image_length=10240, + sample_num=10240, + sample_strategy='linspace', + template_map_fn=dict( + template='xtuner.utils.PROMPT_TEMPLATE.qwen_chat', + type='xtuner.dataset.map_fns.template_map_fn_factory'), + tokenizer=dict( + padding_side='right', + pretrained_model_name_or_path='Qwen/Qwen2.5-7B-Instruct', + trust_remote_code=True, + type='transformers.AutoTokenizer.from_pretrained'), + type='xtuner.dataset.LLaVADataset', + unwanted_prefix_csv= + '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/merged_dataset/missing_slides3.csv' + ), + num_workers=10, + persistent_workers=True, + pin_memory=True, + prefetch_factor=4, + sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler')) +visualizer = None +warmup_ratio = 0.05 +weight_decay = 0.01 +work_dir = '/mnt/bn/yuxuanwang/meilong/code/projects/efficient_foundation_wsi_llava/curriculum_training/models/outputs/multi_stage2_run_stage1_both/stage2c'