| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9963369963369964, | |
| "eval_steps": 1000, | |
| "global_step": 204, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004884004884004884, | |
| "grad_norm": 2.3243459220572165, | |
| "learning_rate": 2.3809523809523807e-08, | |
| "logits/chosen": -2.550273895263672, | |
| "logits/rejected": -2.5806894302368164, | |
| "logps/chosen": -424.7008056640625, | |
| "logps/rejected": -390.49554443359375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 2.4712584301903604, | |
| "learning_rate": 2.3809523809523806e-07, | |
| "logits/chosen": -2.4481005668640137, | |
| "logits/rejected": -2.474926471710205, | |
| "logps/chosen": -395.8595886230469, | |
| "logps/rejected": -384.5038146972656, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.4635416567325592, | |
| "rewards/chosen": 0.00013807932555209845, | |
| "rewards/margins": 0.0004333473916631192, | |
| "rewards/rejected": -0.00029526810976676643, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 2.318089253747947, | |
| "learning_rate": 4.761904761904761e-07, | |
| "logits/chosen": -2.445664167404175, | |
| "logits/rejected": -2.4723546504974365, | |
| "logps/chosen": -393.4665222167969, | |
| "logps/rejected": -377.8502197265625, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.67578125, | |
| "rewards/chosen": 0.00608012406155467, | |
| "rewards/margins": 0.005808630492538214, | |
| "rewards/rejected": 0.00027149339439347386, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 2.5283415680520225, | |
| "learning_rate": 4.970219740227693e-07, | |
| "logits/chosen": -2.494197368621826, | |
| "logits/rejected": -2.5383658409118652, | |
| "logps/chosen": -393.2831115722656, | |
| "logps/rejected": -383.0456237792969, | |
| "loss": 0.6754, | |
| "rewards/accuracies": 0.807812511920929, | |
| "rewards/chosen": 0.03202961012721062, | |
| "rewards/margins": 0.037289537489414215, | |
| "rewards/rejected": -0.005259926896542311, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 2.5364879915405267, | |
| "learning_rate": 4.868186180746791e-07, | |
| "logits/chosen": -2.5153324604034424, | |
| "logits/rejected": -2.5360398292541504, | |
| "logps/chosen": -387.1582946777344, | |
| "logps/rejected": -379.3692932128906, | |
| "loss": 0.647, | |
| "rewards/accuracies": 0.8218749761581421, | |
| "rewards/chosen": 0.0628650039434433, | |
| "rewards/margins": 0.09538714587688446, | |
| "rewards/rejected": -0.032522134482860565, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 2.2699251495146964, | |
| "learning_rate": 4.6965306126428705e-07, | |
| "logits/chosen": -2.539130449295044, | |
| "logits/rejected": -2.5619969367980957, | |
| "logps/chosen": -404.4756774902344, | |
| "logps/rejected": -406.6902770996094, | |
| "loss": 0.5925, | |
| "rewards/accuracies": 0.82421875, | |
| "rewards/chosen": -0.0003643702657427639, | |
| "rewards/margins": 0.2586084008216858, | |
| "rewards/rejected": -0.25897279381752014, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 2.4145672311111994, | |
| "learning_rate": 4.460299516441776e-07, | |
| "logits/chosen": -2.550515651702881, | |
| "logits/rejected": -2.577197551727295, | |
| "logps/chosen": -413.14947509765625, | |
| "logps/rejected": -442.47674560546875, | |
| "loss": 0.5417, | |
| "rewards/accuracies": 0.801562488079071, | |
| "rewards/chosen": -0.15052883327007294, | |
| "rewards/margins": 0.4667808413505554, | |
| "rewards/rejected": -0.6173096895217896, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 2.7821486929434056, | |
| "learning_rate": 4.166437820523908e-07, | |
| "logits/chosen": -2.5242340564727783, | |
| "logits/rejected": -2.5425312519073486, | |
| "logps/chosen": -446.6337890625, | |
| "logps/rejected": -487.9483337402344, | |
| "loss": 0.5011, | |
| "rewards/accuracies": 0.8023437261581421, | |
| "rewards/chosen": -0.4636126160621643, | |
| "rewards/margins": 0.6593486070632935, | |
| "rewards/rejected": -1.1229612827301025, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 2.754390105851781, | |
| "learning_rate": 3.8235847280454626e-07, | |
| "logits/chosen": -2.4175186157226562, | |
| "logits/rejected": -2.449018955230713, | |
| "logps/chosen": -507.10357666015625, | |
| "logps/rejected": -592.0707397460938, | |
| "loss": 0.4575, | |
| "rewards/accuracies": 0.80078125, | |
| "rewards/chosen": -1.1133525371551514, | |
| "rewards/margins": 0.9770663380622864, | |
| "rewards/rejected": -2.090418815612793, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 2.6724675179161568, | |
| "learning_rate": 3.4418197340879627e-07, | |
| "logits/chosen": -2.409747838973999, | |
| "logits/rejected": -2.4172959327697754, | |
| "logps/chosen": -507.91876220703125, | |
| "logps/rejected": -625.10205078125, | |
| "loss": 0.4121, | |
| "rewards/accuracies": 0.835156261920929, | |
| "rewards/chosen": -1.0922380685806274, | |
| "rewards/margins": 1.3621976375579834, | |
| "rewards/rejected": -2.4544358253479004, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 3.151335337005712, | |
| "learning_rate": 3.032366299846039e-07, | |
| "logits/chosen": -2.4340109825134277, | |
| "logits/rejected": -2.4465105533599854, | |
| "logps/chosen": -532.0742797851562, | |
| "logps/rejected": -683.5274658203125, | |
| "loss": 0.3892, | |
| "rewards/accuracies": 0.8335937261581421, | |
| "rewards/chosen": -1.2753849029541016, | |
| "rewards/margins": 1.6482696533203125, | |
| "rewards/rejected": -2.923654794692993, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 2.717335654672678, | |
| "learning_rate": 2.6072618954988863e-07, | |
| "logits/chosen": -2.4394848346710205, | |
| "logits/rejected": -2.442568778991699, | |
| "logps/chosen": -518.7210693359375, | |
| "logps/rejected": -677.293701171875, | |
| "loss": 0.3834, | |
| "rewards/accuracies": 0.827343761920929, | |
| "rewards/chosen": -1.2031551599502563, | |
| "rewards/margins": 1.6999378204345703, | |
| "rewards/rejected": -2.903092861175537, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 3.1603927594667005, | |
| "learning_rate": 2.1790041121336222e-07, | |
| "logits/chosen": -2.4521875381469727, | |
| "logits/rejected": -2.460845470428467, | |
| "logps/chosen": -532.1248168945312, | |
| "logps/rejected": -704.0490112304688, | |
| "loss": 0.3613, | |
| "rewards/accuracies": 0.8414062261581421, | |
| "rewards/chosen": -1.3992774486541748, | |
| "rewards/margins": 1.8053524494171143, | |
| "rewards/rejected": -3.204629898071289, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 2.877029930356179, | |
| "learning_rate": 1.7601832466317766e-07, | |
| "logits/chosen": -2.4438443183898926, | |
| "logits/rejected": -2.462118148803711, | |
| "logps/chosen": -540.3773193359375, | |
| "logps/rejected": -711.51416015625, | |
| "loss": 0.3581, | |
| "rewards/accuracies": 0.8453124761581421, | |
| "rewards/chosen": -1.4280272722244263, | |
| "rewards/margins": 1.8352330923080444, | |
| "rewards/rejected": -3.2632603645324707, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 2.8365028089984454, | |
| "learning_rate": 1.3631121611097362e-07, | |
| "logits/chosen": -2.4740078449249268, | |
| "logits/rejected": -2.487417697906494, | |
| "logps/chosen": -546.05859375, | |
| "logps/rejected": -727.7886962890625, | |
| "loss": 0.3495, | |
| "rewards/accuracies": 0.8609374761581421, | |
| "rewards/chosen": -1.4827759265899658, | |
| "rewards/margins": 1.9239017963409424, | |
| "rewards/rejected": -3.4066779613494873, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 3.083741716442478, | |
| "learning_rate": 9.9946429862908e-08, | |
| "logits/chosen": -2.462756633758545, | |
| "logits/rejected": -2.4654526710510254, | |
| "logps/chosen": -549.0475463867188, | |
| "logps/rejected": -722.2012329101562, | |
| "loss": 0.3454, | |
| "rewards/accuracies": 0.8492187261581421, | |
| "rewards/chosen": -1.5003674030303955, | |
| "rewards/margins": 1.9490848779678345, | |
| "rewards/rejected": -3.4494519233703613, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 3.086204461780561, | |
| "learning_rate": 6.799304971075381e-08, | |
| "logits/chosen": -2.4620633125305176, | |
| "logits/rejected": -2.4670565128326416, | |
| "logps/chosen": -539.4750366210938, | |
| "logps/rejected": -717.6760864257812, | |
| "loss": 0.3426, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -1.4655063152313232, | |
| "rewards/margins": 1.9486806392669678, | |
| "rewards/rejected": -3.41418719291687, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 3.152043777770028, | |
| "learning_rate": 4.1390469071538175e-08, | |
| "logits/chosen": -2.4839229583740234, | |
| "logits/rejected": -2.4979355335235596, | |
| "logps/chosen": -547.6788940429688, | |
| "logps/rejected": -723.7260131835938, | |
| "loss": 0.3417, | |
| "rewards/accuracies": 0.839062511920929, | |
| "rewards/chosen": -1.532496690750122, | |
| "rewards/margins": 1.8789927959442139, | |
| "rewards/rejected": -3.411489486694336, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 3.34422267800285, | |
| "learning_rate": 2.0920773878248837e-08, | |
| "logits/chosen": -2.4812464714050293, | |
| "logits/rejected": -2.49059796333313, | |
| "logps/chosen": -556.6783447265625, | |
| "logps/rejected": -743.0768432617188, | |
| "loss": 0.3413, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.5343992710113525, | |
| "rewards/margins": 2.0101265907287598, | |
| "rewards/rejected": -3.5445258617401123, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 3.2655151670502574, | |
| "learning_rate": 7.185750133542168e-09, | |
| "logits/chosen": -2.473402500152588, | |
| "logits/rejected": -2.471391201019287, | |
| "logps/chosen": -554.5660400390625, | |
| "logps/rejected": -741.9495849609375, | |
| "loss": 0.3343, | |
| "rewards/accuracies": 0.8539062738418579, | |
| "rewards/chosen": -1.5347990989685059, | |
| "rewards/margins": 2.0656068325042725, | |
| "rewards/rejected": -3.6004059314727783, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 4.051209744645471, | |
| "learning_rate": 5.891920784984184e-10, | |
| "logits/chosen": -2.456406354904175, | |
| "logits/rejected": -2.4556210041046143, | |
| "logps/chosen": -550.1729125976562, | |
| "logps/rejected": -737.451904296875, | |
| "loss": 0.3391, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.517017126083374, | |
| "rewards/margins": 2.044365406036377, | |
| "rewards/rejected": -3.561382293701172, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "step": 204, | |
| "total_flos": 0.0, | |
| "train_loss": 0.4527332771058176, | |
| "train_runtime": 5381.1516, | |
| "train_samples_per_second": 38.96, | |
| "train_steps_per_second": 0.038 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 204, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |