Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/0aywf3kf)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/f0mu5ifb)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.08095254935324192,
-    "train_runtime": 3111.6296,
     "train_samples": 7500,
-    "train_samples_per_second": 0.329,
-    "train_steps_per_second": 0.003
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.05527676269412041,
+    "train_runtime": 960.6746,
     "train_samples": 7500,
+    "train_samples_per_second": 1.066,
+    "train_steps_per_second": 0.008
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa011080ea8fd2b7558e600b6571a347ba82ad94678d21d0c99f1ee62025d430
-size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b248d4c16c86a58d89a7fa1bd7202c679100b417f2147a8aa6826d332cf03b3
+size 2624790528

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fbb0115be97acacbf3e674edbc8849e640000c855e49cd864884978c24d55fe
-size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:63ae15675479e5997187ac4e9c25e645d9187ac442197893b3a5ee18b868c4f1
+size 24576

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba236cdf0b06691cfc6c893cde11d55e1103daff8bc2006fa7b34ea6e3fd11c2
-size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e9b081d4695d71e0f71f2f0d21be0fd603083275831f1786a2935b4e51312e4
+size 1579814912

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:34a148659fdf8720103ba2349beb78be69bcf48864884cabe1693c697778f6f5
-size 1089994880

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.08095254935324192,
-    "train_runtime": 3111.6296,
     "train_samples": 7500,
-    "train_samples_per_second": 0.329,
-    "train_steps_per_second": 0.003
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.05527676269412041,
+    "train_runtime": 960.6746,
     "train_samples": 7500,
+    "train_samples_per_second": 1.066,
+    "train_steps_per_second": 0.008
 }

trainer_state.json CHANGED Viewed

@@ -10,47 +10,47 @@
   "log_history": [
     {
       "clip_ratio": 0.0,
-      "completion_length": 524.044677734375,
       "epoch": 0.125,
-      "grad_norm": 0.7352403402328491,
       "kl": 0.0,
       "learning_rate": 3e-06,
-      "loss": 0.0638,
-      "reward": 0.7310268133878708,
-      "reward_std": 0.38357019051909447,
-      "rewards/accuracy_reward": 0.7310268133878708,
       "rewards/format_reward": 0.0,
       "step": 1
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 550.0812196731567,
       "epoch": 0.625,
-      "grad_norm": 248.55787658691406,
-      "kl": 0.024899780750274658,
       "learning_rate": 1.1662185990655286e-06,
-      "loss": 0.0429,
-      "reward": 0.742187537252903,
-      "reward_std": 0.3619732800871134,
-      "rewards/accuracy_reward": 0.7419085185974836,
-      "rewards/format_reward": 0.00027901786961592734,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 538.6674270629883,
       "epoch": 1.0,
-      "kl": 0.012386957804361979,
-      "reward": 0.8236607511838278,
-      "reward_std": 0.2518529687076807,
-      "rewards/accuracy_reward": 0.8236607511838278,
       "rewards/format_reward": 0.0,
       "step": 8,
       "total_flos": 0.0,
-      "train_loss": 0.08095254935324192,
-      "train_runtime": 3111.6296,
-      "train_samples_per_second": 0.329,
-      "train_steps_per_second": 0.003
     }
   ],
   "logging_steps": 5,

   "log_history": [
     {
       "clip_ratio": 0.0,
+      "completion_length": 535.0468978881836,
       "epoch": 0.125,
+      "grad_norm": 1.6275352239608765,
       "kl": 0.0,
       "learning_rate": 3e-06,
+      "loss": 0.0685,
+      "reward": 0.770089328289032,
+      "reward_std": 0.3429133668541908,
+      "rewards/accuracy_reward": 0.770089328289032,
       "rewards/format_reward": 0.0,
       "step": 1
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 555.9333419799805,
       "epoch": 0.625,
+      "grad_norm": 3.0009214878082275,
+      "kl": 0.002394258975982666,
       "learning_rate": 1.1662185990655286e-06,
+      "loss": 0.0451,
+      "reward": 0.7642299458384514,
+      "reward_std": 0.3406056701205671,
+      "rewards/accuracy_reward": 0.7642299458384514,
+      "rewards/format_reward": 0.0,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 559.236998240153,
       "epoch": 1.0,
+      "kl": 0.010375022888183594,
+      "reward": 0.8098958681027094,
+      "reward_std": 0.27716156281530857,
+      "rewards/accuracy_reward": 0.8098958681027094,
       "rewards/format_reward": 0.0,
       "step": 8,
       "total_flos": 0.0,
+      "train_loss": 0.05527676269412041,
+      "train_runtime": 960.6746,
+      "train_samples_per_second": 1.066,
+      "train_steps_per_second": 0.008
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:567ac96f7babeebd1829d4da813e9cb71f4a55b64528992b0586448c2a37a72e
-size 7992

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c6f6b35b6beb62a7df7622d585484daf12e3aa05d722d95db2ec6ae5a6737cf
+size 8120