{ "best_global_step": 76, "best_metric": 1.4253603219985962, "best_model_checkpoint": "autodl-tmp/fine-tuned-lora/checkpoint-76", "epoch": 4.0, "eval_steps": 500, "global_step": 76, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05263157894736842, "grad_norm": 0.991044282913208, "learning_rate": 0.0, "loss": 2.1624, "step": 1 }, { "epoch": 0.10526315789473684, "grad_norm": 1.0417003631591797, "learning_rate": 2.0000000000000003e-06, "loss": 2.1768, "step": 2 }, { "epoch": 0.15789473684210525, "grad_norm": 1.072008490562439, "learning_rate": 4.000000000000001e-06, "loss": 2.2301, "step": 3 }, { "epoch": 0.21052631578947367, "grad_norm": 0.9654397368431091, "learning_rate": 6e-06, "loss": 2.1145, "step": 4 }, { "epoch": 0.2631578947368421, "grad_norm": 1.0050045251846313, "learning_rate": 8.000000000000001e-06, "loss": 2.0663, "step": 5 }, { "epoch": 0.3157894736842105, "grad_norm": 1.098430871963501, "learning_rate": 1e-05, "loss": 2.268, "step": 6 }, { "epoch": 0.3684210526315789, "grad_norm": 0.984906017780304, "learning_rate": 1.2e-05, "loss": 2.1513, "step": 7 }, { "epoch": 0.42105263157894735, "grad_norm": 0.9754630923271179, "learning_rate": 1.4e-05, "loss": 2.0979, "step": 8 }, { "epoch": 0.47368421052631576, "grad_norm": 0.9271889328956604, "learning_rate": 1.6000000000000003e-05, "loss": 2.0335, "step": 9 }, { "epoch": 0.5263157894736842, "grad_norm": 0.8084726929664612, "learning_rate": 1.8e-05, "loss": 2.0471, "step": 10 }, { "epoch": 0.5789473684210527, "grad_norm": 0.7331921458244324, "learning_rate": 2e-05, "loss": 1.9383, "step": 11 }, { "epoch": 0.631578947368421, "grad_norm": 0.8261972665786743, "learning_rate": 1.9764705882352945e-05, "loss": 2.1136, "step": 12 }, { "epoch": 0.6842105263157895, "grad_norm": 0.7031018137931824, "learning_rate": 1.9529411764705885e-05, "loss": 1.9643, "step": 13 }, { "epoch": 0.7368421052631579, "grad_norm": 0.609250009059906, "learning_rate": 1.9294117647058825e-05, "loss": 1.7862, "step": 14 }, { "epoch": 0.7894736842105263, "grad_norm": 0.682807981967926, "learning_rate": 1.9058823529411764e-05, "loss": 1.975, "step": 15 }, { "epoch": 0.8421052631578947, "grad_norm": 0.5955848693847656, "learning_rate": 1.8823529411764708e-05, "loss": 1.8278, "step": 16 }, { "epoch": 0.8947368421052632, "grad_norm": 0.5948333144187927, "learning_rate": 1.8588235294117647e-05, "loss": 1.8618, "step": 17 }, { "epoch": 0.9473684210526315, "grad_norm": 0.561774492263794, "learning_rate": 1.8352941176470587e-05, "loss": 1.8627, "step": 18 }, { "epoch": 1.0, "grad_norm": 0.504706621170044, "learning_rate": 1.811764705882353e-05, "loss": 1.7751, "step": 19 }, { "epoch": 1.0, "eval_loss": 1.8960705995559692, "eval_runtime": 30.4669, "eval_samples_per_second": 8.632, "eval_steps_per_second": 0.098, "step": 19 }, { "epoch": 1.0526315789473684, "grad_norm": 0.5365819931030273, "learning_rate": 1.7882352941176474e-05, "loss": 1.9505, "step": 20 }, { "epoch": 1.1052631578947367, "grad_norm": 0.46491557359695435, "learning_rate": 1.7647058823529414e-05, "loss": 1.6709, "step": 21 }, { "epoch": 1.1578947368421053, "grad_norm": 0.4849880337715149, "learning_rate": 1.7411764705882353e-05, "loss": 1.755, "step": 22 }, { "epoch": 1.2105263157894737, "grad_norm": 0.42490944266319275, "learning_rate": 1.7176470588235293e-05, "loss": 1.6702, "step": 23 }, { "epoch": 1.263157894736842, "grad_norm": 0.4460708498954773, "learning_rate": 1.6941176470588237e-05, "loss": 1.7768, "step": 24 }, { "epoch": 1.3157894736842106, "grad_norm": 0.38298019766807556, "learning_rate": 1.670588235294118e-05, "loss": 1.6531, "step": 25 }, { "epoch": 1.368421052631579, "grad_norm": 0.414841890335083, "learning_rate": 1.647058823529412e-05, "loss": 1.6762, "step": 26 }, { "epoch": 1.4210526315789473, "grad_norm": 0.36949920654296875, "learning_rate": 1.623529411764706e-05, "loss": 1.5959, "step": 27 }, { "epoch": 1.4736842105263157, "grad_norm": 0.34017208218574524, "learning_rate": 1.6000000000000003e-05, "loss": 1.5484, "step": 28 }, { "epoch": 1.526315789473684, "grad_norm": 0.3720543682575226, "learning_rate": 1.5764705882352943e-05, "loss": 1.6612, "step": 29 }, { "epoch": 1.5789473684210527, "grad_norm": 0.3782600164413452, "learning_rate": 1.5529411764705882e-05, "loss": 1.5711, "step": 30 }, { "epoch": 1.631578947368421, "grad_norm": 0.3374978303909302, "learning_rate": 1.5294117647058822e-05, "loss": 1.4678, "step": 31 }, { "epoch": 1.6842105263157894, "grad_norm": 0.6604728698730469, "learning_rate": 1.5058823529411765e-05, "loss": 1.6217, "step": 32 }, { "epoch": 1.736842105263158, "grad_norm": 0.3361934721469879, "learning_rate": 1.4823529411764707e-05, "loss": 1.4735, "step": 33 }, { "epoch": 1.7894736842105263, "grad_norm": 0.419023334980011, "learning_rate": 1.4588235294117647e-05, "loss": 1.6204, "step": 34 }, { "epoch": 1.8421052631578947, "grad_norm": 0.49251654744148254, "learning_rate": 1.435294117647059e-05, "loss": 1.8546, "step": 35 }, { "epoch": 1.8947368421052633, "grad_norm": 0.4037458598613739, "learning_rate": 1.4117647058823532e-05, "loss": 1.585, "step": 36 }, { "epoch": 1.9473684210526314, "grad_norm": 0.373178094625473, "learning_rate": 1.3882352941176471e-05, "loss": 1.5054, "step": 37 }, { "epoch": 2.0, "grad_norm": 0.43062394857406616, "learning_rate": 1.3647058823529413e-05, "loss": 1.6575, "step": 38 }, { "epoch": 2.0, "eval_loss": 1.624245285987854, "eval_runtime": 29.8113, "eval_samples_per_second": 8.822, "eval_steps_per_second": 0.101, "step": 38 }, { "epoch": 2.0526315789473686, "grad_norm": 0.3876766264438629, "learning_rate": 1.3411764705882353e-05, "loss": 1.5434, "step": 39 }, { "epoch": 2.1052631578947367, "grad_norm": 0.36470478773117065, "learning_rate": 1.3176470588235294e-05, "loss": 1.5223, "step": 40 }, { "epoch": 2.1578947368421053, "grad_norm": 0.4518483579158783, "learning_rate": 1.2941176470588238e-05, "loss": 1.4864, "step": 41 }, { "epoch": 2.2105263157894735, "grad_norm": 0.39963680505752563, "learning_rate": 1.2705882352941177e-05, "loss": 1.5323, "step": 42 }, { "epoch": 2.263157894736842, "grad_norm": 0.34100541472435, "learning_rate": 1.2470588235294119e-05, "loss": 1.4836, "step": 43 }, { "epoch": 2.3157894736842106, "grad_norm": 0.371075302362442, "learning_rate": 1.223529411764706e-05, "loss": 1.4348, "step": 44 }, { "epoch": 2.3684210526315788, "grad_norm": 0.3133174180984497, "learning_rate": 1.2e-05, "loss": 1.4263, "step": 45 }, { "epoch": 2.4210526315789473, "grad_norm": 0.36948636174201965, "learning_rate": 1.1764705882352942e-05, "loss": 1.4778, "step": 46 }, { "epoch": 2.473684210526316, "grad_norm": 0.3147697150707245, "learning_rate": 1.1529411764705882e-05, "loss": 1.4395, "step": 47 }, { "epoch": 2.526315789473684, "grad_norm": 0.3104805648326874, "learning_rate": 1.1294117647058825e-05, "loss": 1.4344, "step": 48 }, { "epoch": 2.5789473684210527, "grad_norm": 0.2887001037597656, "learning_rate": 1.1058823529411766e-05, "loss": 1.456, "step": 49 }, { "epoch": 2.6315789473684212, "grad_norm": 0.27948662638664246, "learning_rate": 1.0823529411764706e-05, "loss": 1.4507, "step": 50 }, { "epoch": 2.6842105263157894, "grad_norm": 0.29038968682289124, "learning_rate": 1.0588235294117648e-05, "loss": 1.4486, "step": 51 }, { "epoch": 2.736842105263158, "grad_norm": 0.28142035007476807, "learning_rate": 1.035294117647059e-05, "loss": 1.5234, "step": 52 }, { "epoch": 2.7894736842105265, "grad_norm": 0.2857128083705902, "learning_rate": 1.011764705882353e-05, "loss": 1.4186, "step": 53 }, { "epoch": 2.8421052631578947, "grad_norm": 0.27921512722969055, "learning_rate": 9.882352941176472e-06, "loss": 1.4161, "step": 54 }, { "epoch": 2.8947368421052633, "grad_norm": 0.2870796024799347, "learning_rate": 9.647058823529412e-06, "loss": 1.4665, "step": 55 }, { "epoch": 2.9473684210526314, "grad_norm": 0.2911064326763153, "learning_rate": 9.411764705882354e-06, "loss": 1.3341, "step": 56 }, { "epoch": 3.0, "grad_norm": 0.29733651876449585, "learning_rate": 9.176470588235294e-06, "loss": 1.3136, "step": 57 }, { "epoch": 3.0, "eval_loss": 1.4926354885101318, "eval_runtime": 29.9258, "eval_samples_per_second": 8.788, "eval_steps_per_second": 0.1, "step": 57 }, { "epoch": 3.0526315789473686, "grad_norm": 0.2842563986778259, "learning_rate": 8.941176470588237e-06, "loss": 1.3403, "step": 58 }, { "epoch": 3.1052631578947367, "grad_norm": 0.2729772627353668, "learning_rate": 8.705882352941177e-06, "loss": 1.4465, "step": 59 }, { "epoch": 3.1578947368421053, "grad_norm": 0.27147042751312256, "learning_rate": 8.470588235294118e-06, "loss": 1.4449, "step": 60 }, { "epoch": 3.2105263157894735, "grad_norm": 0.2780725657939911, "learning_rate": 8.23529411764706e-06, "loss": 1.4588, "step": 61 }, { "epoch": 3.263157894736842, "grad_norm": 0.279209703207016, "learning_rate": 8.000000000000001e-06, "loss": 1.344, "step": 62 }, { "epoch": 3.3157894736842106, "grad_norm": 0.42355313897132874, "learning_rate": 7.764705882352941e-06, "loss": 1.4197, "step": 63 }, { "epoch": 3.3684210526315788, "grad_norm": 0.27756112813949585, "learning_rate": 7.529411764705883e-06, "loss": 1.2938, "step": 64 }, { "epoch": 3.4210526315789473, "grad_norm": 0.2620949447154999, "learning_rate": 7.294117647058823e-06, "loss": 1.4932, "step": 65 }, { "epoch": 3.473684210526316, "grad_norm": 0.28503599762916565, "learning_rate": 7.058823529411766e-06, "loss": 1.5613, "step": 66 }, { "epoch": 3.526315789473684, "grad_norm": 0.26462414860725403, "learning_rate": 6.8235294117647065e-06, "loss": 1.3213, "step": 67 }, { "epoch": 3.5789473684210527, "grad_norm": 0.26519039273262024, "learning_rate": 6.588235294117647e-06, "loss": 1.4058, "step": 68 }, { "epoch": 3.6315789473684212, "grad_norm": 0.2812947928905487, "learning_rate": 6.352941176470589e-06, "loss": 1.1533, "step": 69 }, { "epoch": 3.6842105263157894, "grad_norm": 0.26864585280418396, "learning_rate": 6.11764705882353e-06, "loss": 1.2935, "step": 70 }, { "epoch": 3.736842105263158, "grad_norm": 0.27379095554351807, "learning_rate": 5.882352941176471e-06, "loss": 1.3692, "step": 71 }, { "epoch": 3.7894736842105265, "grad_norm": 0.2734774947166443, "learning_rate": 5.6470588235294125e-06, "loss": 1.3805, "step": 72 }, { "epoch": 3.8421052631578947, "grad_norm": 0.2809073328971863, "learning_rate": 5.411764705882353e-06, "loss": 1.3503, "step": 73 }, { "epoch": 3.8947368421052633, "grad_norm": 0.2657577097415924, "learning_rate": 5.176470588235295e-06, "loss": 1.1972, "step": 74 }, { "epoch": 3.9473684210526314, "grad_norm": 0.26183557510375977, "learning_rate": 4.941176470588236e-06, "loss": 1.3515, "step": 75 }, { "epoch": 4.0, "grad_norm": 0.29608941078186035, "learning_rate": 4.705882352941177e-06, "loss": 1.1138, "step": 76 }, { "epoch": 4.0, "eval_loss": 1.4253603219985962, "eval_runtime": 29.788, "eval_samples_per_second": 8.829, "eval_steps_per_second": 0.101, "step": 76 } ], "logging_steps": 1, "max_steps": 95, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.1 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.300432166000026e+17, "train_batch_size": 128, "trial_name": null, "trial_params": null }