{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 9.71111111111111,
  "eval_steps": 500,
  "global_step": 330,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.2962962962962963,
      "grad_norm": 4.49782157269768,
      "learning_rate": 9e-06,
      "loss": 0.8906,
      "step": 10
    },
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 1.7319757135180889,
      "learning_rate": 9.71875e-06,
      "loss": 0.3413,
      "step": 20
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 1.3535304038871057,
      "learning_rate": 9.406250000000002e-06,
      "loss": 0.2847,
      "step": 30
    },
    {
      "epoch": 1.1777777777777778,
      "grad_norm": 1.1753491938410972,
      "learning_rate": 9.09375e-06,
      "loss": 0.2435,
      "step": 40
    },
    {
      "epoch": 1.474074074074074,
      "grad_norm": 1.3529820931928744,
      "learning_rate": 8.781250000000002e-06,
      "loss": 0.2002,
      "step": 50
    },
    {
      "epoch": 1.7703703703703704,
      "grad_norm": 1.2055625223119095,
      "learning_rate": 8.468750000000001e-06,
      "loss": 0.1946,
      "step": 60
    },
    {
      "epoch": 2.0592592592592593,
      "grad_norm": 0.9694081925518563,
      "learning_rate": 8.156250000000002e-06,
      "loss": 0.1785,
      "step": 70
    },
    {
      "epoch": 2.3555555555555556,
      "grad_norm": 1.2118570235556574,
      "learning_rate": 7.843750000000001e-06,
      "loss": 0.1136,
      "step": 80
    },
    {
      "epoch": 2.651851851851852,
      "grad_norm": 1.138977678286847,
      "learning_rate": 7.531250000000001e-06,
      "loss": 0.1115,
      "step": 90
    },
    {
      "epoch": 2.948148148148148,
      "grad_norm": 1.0036786958910326,
      "learning_rate": 7.218750000000001e-06,
      "loss": 0.1126,
      "step": 100
    },
    {
      "epoch": 3.237037037037037,
      "grad_norm": 0.8367560642733725,
      "learning_rate": 6.906250000000001e-06,
      "loss": 0.0728,
      "step": 110
    },
    {
      "epoch": 3.533333333333333,
      "grad_norm": 1.0509440799673675,
      "learning_rate": 6.593750000000001e-06,
      "loss": 0.0617,
      "step": 120
    },
    {
      "epoch": 3.8296296296296295,
      "grad_norm": 0.8960911427806355,
      "learning_rate": 6.281250000000001e-06,
      "loss": 0.0649,
      "step": 130
    },
    {
      "epoch": 4.118518518518519,
      "grad_norm": 0.7809646327713474,
      "learning_rate": 5.968750000000001e-06,
      "loss": 0.0526,
      "step": 140
    },
    {
      "epoch": 4.4148148148148145,
      "grad_norm": 0.8549249368670622,
      "learning_rate": 5.656250000000001e-06,
      "loss": 0.0335,
      "step": 150
    },
    {
      "epoch": 4.711111111111111,
      "grad_norm": 0.6945788960835679,
      "learning_rate": 5.343750000000001e-06,
      "loss": 0.0349,
      "step": 160
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.7841073342004105,
      "learning_rate": 5.031250000000001e-06,
      "loss": 0.0347,
      "step": 170
    },
    {
      "epoch": 5.296296296296296,
      "grad_norm": 0.6556195586614093,
      "learning_rate": 4.71875e-06,
      "loss": 0.0167,
      "step": 180
    },
    {
      "epoch": 5.592592592592593,
      "grad_norm": 0.6075821111027782,
      "learning_rate": 4.40625e-06,
      "loss": 0.0174,
      "step": 190
    },
    {
      "epoch": 5.888888888888889,
      "grad_norm": 0.6111587561787415,
      "learning_rate": 4.09375e-06,
      "loss": 0.0173,
      "step": 200
    },
    {
      "epoch": 6.177777777777778,
      "grad_norm": 0.40463595396228347,
      "learning_rate": 3.78125e-06,
      "loss": 0.0117,
      "step": 210
    },
    {
      "epoch": 6.474074074074074,
      "grad_norm": 0.39652000374030977,
      "learning_rate": 3.46875e-06,
      "loss": 0.0079,
      "step": 220
    },
    {
      "epoch": 6.770370370370371,
      "grad_norm": 0.4927512843479825,
      "learning_rate": 3.15625e-06,
      "loss": 0.0082,
      "step": 230
    },
    {
      "epoch": 7.059259259259259,
      "grad_norm": 0.36605662616703716,
      "learning_rate": 2.84375e-06,
      "loss": 0.0073,
      "step": 240
    },
    {
      "epoch": 7.355555555555555,
      "grad_norm": 0.2483553830101468,
      "learning_rate": 2.53125e-06,
      "loss": 0.0038,
      "step": 250
    },
    {
      "epoch": 7.651851851851852,
      "grad_norm": 0.28219727023639773,
      "learning_rate": 2.21875e-06,
      "loss": 0.0037,
      "step": 260
    },
    {
      "epoch": 7.948148148148148,
      "grad_norm": 0.4959724939070521,
      "learning_rate": 1.90625e-06,
      "loss": 0.0032,
      "step": 270
    },
    {
      "epoch": 8.237037037037037,
      "grad_norm": 0.29779702369903077,
      "learning_rate": 1.59375e-06,
      "loss": 0.0018,
      "step": 280
    },
    {
      "epoch": 8.533333333333333,
      "grad_norm": 0.20927856889654697,
      "learning_rate": 1.28125e-06,
      "loss": 0.0015,
      "step": 290
    },
    {
      "epoch": 8.829629629629629,
      "grad_norm": 0.09637630938977507,
      "learning_rate": 9.6875e-07,
      "loss": 0.0014,
      "step": 300
    },
    {
      "epoch": 9.118518518518519,
      "grad_norm": 0.060667953625655406,
      "learning_rate": 6.562500000000001e-07,
      "loss": 0.0011,
      "step": 310
    },
    {
      "epoch": 9.414814814814815,
      "grad_norm": 0.061392256534022145,
      "learning_rate": 3.4375000000000004e-07,
      "loss": 0.0007,
      "step": 320
    },
    {
      "epoch": 9.71111111111111,
      "grad_norm": 0.09163877069911756,
      "learning_rate": 3.1250000000000005e-08,
      "loss": 0.0008,
      "step": 330
    }
  ],
  "logging_steps": 10,
  "max_steps": 330,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 186149046386688.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}