phd2023 commited on
Commit
4f1ae5a
·
verified ·
1 Parent(s): a677e34

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/0aywf3kf)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/f0mu5ifb)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.08095254935324192,
4
- "train_runtime": 3111.6296,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.329,
7
- "train_steps_per_second": 0.003
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.05527676269412041,
4
+ "train_runtime": 960.6746,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 1.066,
7
+ "train_steps_per_second": 0.008
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa011080ea8fd2b7558e600b6571a347ba82ad94678d21d0c99f1ee62025d430
3
- size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b248d4c16c86a58d89a7fa1bd7202c679100b417f2147a8aa6826d332cf03b3
3
+ size 2624790528
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fbb0115be97acacbf3e674edbc8849e640000c855e49cd864884978c24d55fe
3
- size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ae15675479e5997187ac4e9c25e645d9187ac442197893b3a5ee18b868c4f1
3
+ size 24576
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba236cdf0b06691cfc6c893cde11d55e1103daff8bc2006fa7b34ea6e3fd11c2
3
- size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e9b081d4695d71e0f71f2f0d21be0fd603083275831f1786a2935b4e51312e4
3
+ size 1579814912
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:34a148659fdf8720103ba2349beb78be69bcf48864884cabe1693c697778f6f5
3
- size 1089994880
 
 
 
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.08095254935324192,
4
- "train_runtime": 3111.6296,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.329,
7
- "train_steps_per_second": 0.003
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.05527676269412041,
4
+ "train_runtime": 960.6746,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 1.066,
7
+ "train_steps_per_second": 0.008
8
  }
trainer_state.json CHANGED
@@ -10,47 +10,47 @@
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
- "completion_length": 524.044677734375,
14
  "epoch": 0.125,
15
- "grad_norm": 0.7352403402328491,
16
  "kl": 0.0,
17
  "learning_rate": 3e-06,
18
- "loss": 0.0638,
19
- "reward": 0.7310268133878708,
20
- "reward_std": 0.38357019051909447,
21
- "rewards/accuracy_reward": 0.7310268133878708,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
- "completion_length": 550.0812196731567,
28
  "epoch": 0.625,
29
- "grad_norm": 248.55787658691406,
30
- "kl": 0.024899780750274658,
31
  "learning_rate": 1.1662185990655286e-06,
32
- "loss": 0.0429,
33
- "reward": 0.742187537252903,
34
- "reward_std": 0.3619732800871134,
35
- "rewards/accuracy_reward": 0.7419085185974836,
36
- "rewards/format_reward": 0.00027901786961592734,
37
  "step": 5
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
- "completion_length": 538.6674270629883,
42
  "epoch": 1.0,
43
- "kl": 0.012386957804361979,
44
- "reward": 0.8236607511838278,
45
- "reward_std": 0.2518529687076807,
46
- "rewards/accuracy_reward": 0.8236607511838278,
47
  "rewards/format_reward": 0.0,
48
  "step": 8,
49
  "total_flos": 0.0,
50
- "train_loss": 0.08095254935324192,
51
- "train_runtime": 3111.6296,
52
- "train_samples_per_second": 0.329,
53
- "train_steps_per_second": 0.003
54
  }
55
  ],
56
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
+ "completion_length": 535.0468978881836,
14
  "epoch": 0.125,
15
+ "grad_norm": 1.6275352239608765,
16
  "kl": 0.0,
17
  "learning_rate": 3e-06,
18
+ "loss": 0.0685,
19
+ "reward": 0.770089328289032,
20
+ "reward_std": 0.3429133668541908,
21
+ "rewards/accuracy_reward": 0.770089328289032,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
+ "completion_length": 555.9333419799805,
28
  "epoch": 0.625,
29
+ "grad_norm": 3.0009214878082275,
30
+ "kl": 0.002394258975982666,
31
  "learning_rate": 1.1662185990655286e-06,
32
+ "loss": 0.0451,
33
+ "reward": 0.7642299458384514,
34
+ "reward_std": 0.3406056701205671,
35
+ "rewards/accuracy_reward": 0.7642299458384514,
36
+ "rewards/format_reward": 0.0,
37
  "step": 5
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
+ "completion_length": 559.236998240153,
42
  "epoch": 1.0,
43
+ "kl": 0.010375022888183594,
44
+ "reward": 0.8098958681027094,
45
+ "reward_std": 0.27716156281530857,
46
+ "rewards/accuracy_reward": 0.8098958681027094,
47
  "rewards/format_reward": 0.0,
48
  "step": 8,
49
  "total_flos": 0.0,
50
+ "train_loss": 0.05527676269412041,
51
+ "train_runtime": 960.6746,
52
+ "train_samples_per_second": 1.066,
53
+ "train_steps_per_second": 0.008
54
  }
55
  ],
56
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:567ac96f7babeebd1829d4da813e9cb71f4a55b64528992b0586448c2a37a72e
3
- size 7992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6f6b35b6beb62a7df7622d585484daf12e3aa05d722d95db2ec6ae5a6737cf
3
+ size 8120