| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9956122856003191, | |
| "eval_steps": 500, | |
| "global_step": 117, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008509506714532641, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.8719, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.017019013429065283, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.8402, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.025528520143597924, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8438, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.034038026858130566, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.8171, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.04254753357266321, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.8029, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05105704028719585, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7858, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0595665470017285, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.7671, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.06807605371626113, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.7989, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07658556043079377, | |
| "learning_rate": 3e-05, | |
| "loss": 0.7546, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.08509506714532641, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.745, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09360457385985906, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.7169, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1021140805743917, | |
| "learning_rate": 4e-05, | |
| "loss": 0.7561, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11062358728892434, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.8475, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.119133094003457, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.9401, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12764260071798963, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8515, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13615210743252226, | |
| "learning_rate": 4.9509803921568634e-05, | |
| "loss": 0.8282, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.14466161414705492, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 0.7526, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.15317112086158755, | |
| "learning_rate": 4.8529411764705885e-05, | |
| "loss": 0.709, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1616806275761202, | |
| "learning_rate": 4.803921568627452e-05, | |
| "loss": 0.7005, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.17019013429065283, | |
| "learning_rate": 4.7549019607843135e-05, | |
| "loss": 0.6978, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17869964100518548, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.6689, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.1872091477197181, | |
| "learning_rate": 4.656862745098039e-05, | |
| "loss": 0.7231, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.19571865443425077, | |
| "learning_rate": 4.607843137254902e-05, | |
| "loss": 0.6685, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.2042281611487834, | |
| "learning_rate": 4.558823529411765e-05, | |
| "loss": 0.6776, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.21273766786331605, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "loss": 0.6853, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.22124717457784868, | |
| "learning_rate": 4.460784313725491e-05, | |
| "loss": 0.7705, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.22975668129238133, | |
| "learning_rate": 4.411764705882353e-05, | |
| "loss": 0.7922, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.238266188006914, | |
| "learning_rate": 4.362745098039216e-05, | |
| "loss": 0.6948, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.24677569472144661, | |
| "learning_rate": 4.313725490196079e-05, | |
| "loss": 0.6748, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.25528520143597927, | |
| "learning_rate": 4.2647058823529415e-05, | |
| "loss": 0.7134, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2637947081505119, | |
| "learning_rate": 4.215686274509804e-05, | |
| "loss": 0.6921, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2723042148650445, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.6839, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2808137215795772, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 0.6808, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.28932322829410984, | |
| "learning_rate": 4.068627450980392e-05, | |
| "loss": 0.6939, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2978327350086425, | |
| "learning_rate": 4.0196078431372555e-05, | |
| "loss": 0.6846, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3063422417231751, | |
| "learning_rate": 3.970588235294117e-05, | |
| "loss": 0.6456, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.31485174843770775, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 0.6725, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3233612551522404, | |
| "learning_rate": 3.872549019607844e-05, | |
| "loss": 0.682, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.33187076186677306, | |
| "learning_rate": 3.8235294117647055e-05, | |
| "loss": 0.7029, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.34038026858130566, | |
| "learning_rate": 3.774509803921569e-05, | |
| "loss": 0.7072, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3488897752958383, | |
| "learning_rate": 3.725490196078432e-05, | |
| "loss": 0.6925, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.35739928201037097, | |
| "learning_rate": 3.6764705882352945e-05, | |
| "loss": 0.6642, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3659087887249036, | |
| "learning_rate": 3.627450980392157e-05, | |
| "loss": 0.6483, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3744182954394362, | |
| "learning_rate": 3.5784313725490195e-05, | |
| "loss": 0.6779, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.3829278021539689, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 0.6664, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.39143730886850153, | |
| "learning_rate": 3.480392156862745e-05, | |
| "loss": 0.6493, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3999468155830342, | |
| "learning_rate": 3.431372549019608e-05, | |
| "loss": 0.6853, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4084563222975668, | |
| "learning_rate": 3.382352941176471e-05, | |
| "loss": 0.6549, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.41696582901209944, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.6659, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.4254753357266321, | |
| "learning_rate": 3.284313725490196e-05, | |
| "loss": 0.6639, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.43398484244116475, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": 0.6583, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.44249434915569735, | |
| "learning_rate": 3.186274509803922e-05, | |
| "loss": 0.6585, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.45100385587023, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": 0.6753, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.45951336258476266, | |
| "learning_rate": 3.0882352941176475e-05, | |
| "loss": 0.6712, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.4680228692992953, | |
| "learning_rate": 3.0392156862745097e-05, | |
| "loss": 0.6632, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.476532376013828, | |
| "learning_rate": 2.9901960784313725e-05, | |
| "loss": 0.6673, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.4850418827283606, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 0.6444, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.49355138944289323, | |
| "learning_rate": 2.8921568627450986e-05, | |
| "loss": 0.6673, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5020608961574259, | |
| "learning_rate": 2.8431372549019608e-05, | |
| "loss": 0.6629, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5105704028719585, | |
| "learning_rate": 2.7941176470588236e-05, | |
| "loss": 0.6723, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5190799095864912, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": 0.6622, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5275894163010239, | |
| "learning_rate": 2.696078431372549e-05, | |
| "loss": 0.6606, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5360989230155564, | |
| "learning_rate": 2.647058823529412e-05, | |
| "loss": 0.6823, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.544608429730089, | |
| "learning_rate": 2.5980392156862747e-05, | |
| "loss": 0.6519, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5531179364446217, | |
| "learning_rate": 2.5490196078431373e-05, | |
| "loss": 0.6702, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5616274431591544, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.6463, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.570136949873687, | |
| "learning_rate": 2.4509803921568626e-05, | |
| "loss": 0.6604, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5786464565882197, | |
| "learning_rate": 2.401960784313726e-05, | |
| "loss": 0.6535, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5871559633027523, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 0.6605, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.595665470017285, | |
| "learning_rate": 2.303921568627451e-05, | |
| "loss": 0.6514, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6041749767318175, | |
| "learning_rate": 2.2549019607843138e-05, | |
| "loss": 0.6533, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6126844834463502, | |
| "learning_rate": 2.2058823529411766e-05, | |
| "loss": 0.6564, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6211939901608828, | |
| "learning_rate": 2.1568627450980395e-05, | |
| "loss": 0.6679, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6297034968754155, | |
| "learning_rate": 2.107843137254902e-05, | |
| "loss": 0.6476, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6382130035899481, | |
| "learning_rate": 2.058823529411765e-05, | |
| "loss": 0.6917, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6467225103044808, | |
| "learning_rate": 2.0098039215686277e-05, | |
| "loss": 0.6565, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6552320170190135, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 0.6329, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.6637415237335461, | |
| "learning_rate": 1.9117647058823528e-05, | |
| "loss": 0.6149, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6722510304480788, | |
| "learning_rate": 1.862745098039216e-05, | |
| "loss": 0.6799, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.6807605371626113, | |
| "learning_rate": 1.8137254901960785e-05, | |
| "loss": 0.6458, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.689270043877144, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 0.6376, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.6977795505916766, | |
| "learning_rate": 1.715686274509804e-05, | |
| "loss": 0.6688, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7062890573062093, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.6466, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.7147985640207419, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": 0.6386, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7233080707352746, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 0.6427, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7318175774498072, | |
| "learning_rate": 1.5196078431372548e-05, | |
| "loss": 0.6617, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7403270841643399, | |
| "learning_rate": 1.4705882352941177e-05, | |
| "loss": 0.6573, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7488365908788724, | |
| "learning_rate": 1.4215686274509804e-05, | |
| "loss": 0.6342, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7573460975934051, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": 0.6455, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.7658556043079378, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 0.6042, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7743651110224704, | |
| "learning_rate": 1.2745098039215686e-05, | |
| "loss": 0.639, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.7828746177370031, | |
| "learning_rate": 1.2254901960784313e-05, | |
| "loss": 0.6496, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.7913841244515357, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 0.6474, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.7998936311660684, | |
| "learning_rate": 1.1274509803921569e-05, | |
| "loss": 0.6418, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.808403137880601, | |
| "learning_rate": 1.0784313725490197e-05, | |
| "loss": 0.6434, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8169126445951336, | |
| "learning_rate": 1.0294117647058824e-05, | |
| "loss": 0.659, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8254221513096662, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 0.6342, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8339316580241989, | |
| "learning_rate": 9.31372549019608e-06, | |
| "loss": 0.647, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8424411647387315, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.6306, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.8509506714532642, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.6724, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8594601781677969, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.6455, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.8679696848823295, | |
| "learning_rate": 7.3529411764705884e-06, | |
| "loss": 0.634, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.8764791915968622, | |
| "learning_rate": 6.862745098039216e-06, | |
| "loss": 0.6353, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.8849886983113947, | |
| "learning_rate": 6.372549019607843e-06, | |
| "loss": 0.6632, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.8934982050259274, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.6616, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.90200771174046, | |
| "learning_rate": 5.392156862745099e-06, | |
| "loss": 0.6312, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9105172184549927, | |
| "learning_rate": 4.901960784313726e-06, | |
| "loss": 0.6592, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9190267251695253, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 0.6634, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.927536231884058, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 0.6428, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9360457385985906, | |
| "learning_rate": 3.431372549019608e-06, | |
| "loss": 0.6261, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9445552453131233, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.6425, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.953064752027656, | |
| "learning_rate": 2.450980392156863e-06, | |
| "loss": 0.6614, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.9615742587421885, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 0.6545, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.9700837654567211, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 0.6184, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.9785932721712538, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": 0.6671, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.9871027788857865, | |
| "learning_rate": 4.901960784313725e-07, | |
| "loss": 0.65, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.9956122856003191, | |
| "learning_rate": 0.0, | |
| "loss": 0.6446, | |
| "step": 117 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 117, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.689042619882799e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |