Joosep Pata
commited on
Commit
·
c0bca33
1
Parent(s):
baf0aea
update readme
Browse files- clic/clusters/v2.3.0/README.md +5 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/.gitattributes +4 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/hyperparameters.json +1 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/model_kwargs.pkl +3 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/overridden_config.yaml +227 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/runs/train/events.out.tfevents.1741957152.workergpu072.934288.0 +3 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/runs/valid/events.out.tfevents.1741957152.workergpu072.934288.1 +3 -0
- clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/train-config.yaml +227 -0
clic/clusters/v2.3.0/README.md
CHANGED
|
@@ -9,4 +9,9 @@ pyg-clic_20250209_100514_187330 - transformer + flash attention, 4M events from
|
|
| 9 |
pyg-clic_20250130_214007_333962 - transformer + flash attention, full dataset, 10 epochs / ~80 hours, 1st run
|
| 10 |
pyg-clic_20250306_105311_290722 - transformer + flash attention, full dataset, 10 epochs / ~80 hours, 2nd run
|
| 11 |
pyg-clic_20250309_173756_957486 - transformer + flash attention, full dataset, 10 epochs / ~80 hours, 3rd run
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
```
|
|
|
|
| 9 |
pyg-clic_20250130_214007_333962 - transformer + flash attention, full dataset, 10 epochs / ~80 hours, 1st run
|
| 10 |
pyg-clic_20250306_105311_290722 - transformer + flash attention, full dataset, 10 epochs / ~80 hours, 2nd run
|
| 11 |
pyg-clic_20250309_173756_957486 - transformer + flash attention, full dataset, 10 epochs / ~80 hours, 3rd run
|
| 12 |
+
|
| 13 |
+
#multi-GPU tests
|
| 14 |
+
largebatch_study_gpus4_notscaledLR0.0001_epochs30_bsm256_adamw_a100_cu124_fulldataset_pyg-clic-v230_20250219_055135_172489 - just run on 4x GPUs
|
| 15 |
+
largebatch_study_gpus4_linearscaledLR0.0004_epochs30_bsm256_adamw_a100_cu124_fulldataset_pyg-clic-v230_20250217_082738_406721 - run on 4x GPUs, scale learning rate by 4x
|
| 16 |
+
largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888 - run on 4x GPUs, scale learning rate by 4x, scale weight decay by 3x
|
| 17 |
```
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/.gitattributes
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
plots_checkpoint*/** filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
preds_checkpoint*/** filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
runs/** filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
checkpoints/** filter=lfs diff=lfs merge=lfs -text
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/hyperparameters.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"num_mlpf_params": 52630547, "checkpoint_freq": 1, "comet": true, "comet_name": "particleflow-pt", "comet_offline": false, "comet_step_freq": 1000, "conv_type": "attention", "data_dir": "/mnt/ceph/users/ewulff/tensorflow_datasets/clic", "dataset": "clic", "dtype": "bfloat16", "enabled_test_datasets": ["clic_edm_qq_pf"], "finetune": null, "gpu_batch_multiplier": 256, "gpus": 4, "load": null, "lr": 0.0004, "optimizer": "adamw", "weight_decay": 0.03, "lr_schedule": "cosinedecay", "lr_schedule_config": {"onecycle": {"pct_start": 0.3}}, "make_plots": null, "model": {"attention": {"activation": "relu", "attention_type": "flash", "conv_type": "attention", "dropout_conv_id_ff": 0.0, "dropout_conv_id_mha": 0.0, "dropout_conv_reg_ff": 0.0, "dropout_conv_reg_mha": 0.0, "dropout_ff": 0.0, "head_dim": 32, "num_convs": 3, "num_heads": 32, "use_pre_layernorm": true}, "cos_phi_mode": "linear", "energy_mode": "direct-elemtype-split", "eta_mode": "linear", "gnn_lsh": {"activation": "elu", "bin_size": 32, "conv_type": "gnn_lsh", "distance_dim": 128, "embedding_dim": 512, "ffn_dist_hidden_dim": 128, "ffn_dist_num_layers": 2, "layernorm": true, "max_num_bins": 200, "num_convs": 8, "num_node_messages": 2, "width": 512}, "input_encoding": "split", "learned_representation_mode": "last", "mamba": {"activation": "elu", "conv_type": "mamba", "d_conv": 4, "d_state": 16, "dropout": 0.0, "embedding_dim": 128, "expand": 2, "num_convs": 2, "num_heads": 2, "width": 128}, "pt_mode": "direct-elemtype-split", "sin_phi_mode": "linear", "trainable": "all"}, "ntest": 2000, "ntrain": null, "num_epochs": 10, "num_workers": 12, "nvalid": null, "patience": 20, "prefetch_factor": 100, "raytune": {"asha": {"brackets": 1, "grace_period": 4, "max_t": 200, "reduction_factor": 4}, "default_metric": "val_loss", "default_mode": "min", "hyperband": {"max_t": 200, "reduction_factor": 4}, "hyperopt": {"n_random_steps": 10}, "local_dir": "/mnt/ceph/users/ewulff/ray_results", "nevergrad": {"n_random_steps": 10}, "sched": null, "search_alg": null}, "save_attention": true, "sort_data": false, "test": null, "test_dataset": {"clic_edm_qq_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}, "clic_edm_ttbar_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}, "clic_edm_ww_fullhad_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}}, "train": true, "train_dataset": {"clic": {"physical": {"batch_size": 1, "samples": {"clic_edm_qq_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}, "clic_edm_ttbar_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}, "clic_edm_ww_fullhad_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}}}}}, "val_freq": null, "valid_dataset": {"clic": {"physical": {"batch_size": 1, "samples": {"clic_edm_qq_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}, "clic_edm_ttbar_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}, "clic_edm_ww_fullhad_pf": {"splits": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "version": "2.5.0"}}}}}}
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/model_kwargs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08d9fedbafe13195772b70373f3ad6d7dcff24c98624585cc27f59b557bb5ae7
|
| 3 |
+
size 553
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/overridden_config.yaml
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoint_freq: 1
|
| 2 |
+
comet: true
|
| 3 |
+
comet_name: particleflow-pt
|
| 4 |
+
comet_offline: false
|
| 5 |
+
comet_step_freq: 1000
|
| 6 |
+
conv_type: attention
|
| 7 |
+
data_dir: /mnt/ceph/users/ewulff/tensorflow_datasets/clic
|
| 8 |
+
dataset: clic
|
| 9 |
+
dtype: bfloat16
|
| 10 |
+
enabled_test_datasets:
|
| 11 |
+
- clic_edm_qq_pf
|
| 12 |
+
finetune: null
|
| 13 |
+
gpu_batch_multiplier: 256
|
| 14 |
+
gpus: 4
|
| 15 |
+
load: null
|
| 16 |
+
lr: 0.0004
|
| 17 |
+
lr_schedule: cosinedecay
|
| 18 |
+
lr_schedule_config:
|
| 19 |
+
onecycle:
|
| 20 |
+
pct_start: 0.3
|
| 21 |
+
make_plots: null
|
| 22 |
+
model:
|
| 23 |
+
attention:
|
| 24 |
+
activation: relu
|
| 25 |
+
attention_type: flash
|
| 26 |
+
conv_type: attention
|
| 27 |
+
dropout_conv_id_ff: 0.0
|
| 28 |
+
dropout_conv_id_mha: 0.0
|
| 29 |
+
dropout_conv_reg_ff: 0.0
|
| 30 |
+
dropout_conv_reg_mha: 0.0
|
| 31 |
+
dropout_ff: 0.0
|
| 32 |
+
head_dim: 32
|
| 33 |
+
num_convs: 3
|
| 34 |
+
num_heads: 32
|
| 35 |
+
use_pre_layernorm: true
|
| 36 |
+
cos_phi_mode: linear
|
| 37 |
+
energy_mode: direct-elemtype-split
|
| 38 |
+
eta_mode: linear
|
| 39 |
+
gnn_lsh:
|
| 40 |
+
activation: elu
|
| 41 |
+
bin_size: 32
|
| 42 |
+
conv_type: gnn_lsh
|
| 43 |
+
distance_dim: 128
|
| 44 |
+
embedding_dim: 512
|
| 45 |
+
ffn_dist_hidden_dim: 128
|
| 46 |
+
ffn_dist_num_layers: 2
|
| 47 |
+
layernorm: true
|
| 48 |
+
max_num_bins: 200
|
| 49 |
+
num_convs: 8
|
| 50 |
+
num_node_messages: 2
|
| 51 |
+
width: 512
|
| 52 |
+
input_encoding: split
|
| 53 |
+
learned_representation_mode: last
|
| 54 |
+
mamba:
|
| 55 |
+
activation: elu
|
| 56 |
+
conv_type: mamba
|
| 57 |
+
d_conv: 4
|
| 58 |
+
d_state: 16
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
embedding_dim: 128
|
| 61 |
+
expand: 2
|
| 62 |
+
num_convs: 2
|
| 63 |
+
num_heads: 2
|
| 64 |
+
width: 128
|
| 65 |
+
pt_mode: direct-elemtype-split
|
| 66 |
+
sin_phi_mode: linear
|
| 67 |
+
trainable: all
|
| 68 |
+
ntest: 2000
|
| 69 |
+
ntrain: null
|
| 70 |
+
num_epochs: 10
|
| 71 |
+
num_workers: 12
|
| 72 |
+
nvalid: null
|
| 73 |
+
optimizer: adamw
|
| 74 |
+
patience: 20
|
| 75 |
+
prefetch_factor: 100
|
| 76 |
+
raytune:
|
| 77 |
+
asha:
|
| 78 |
+
brackets: 1
|
| 79 |
+
grace_period: 4
|
| 80 |
+
max_t: 200
|
| 81 |
+
reduction_factor: 4
|
| 82 |
+
default_metric: val_loss
|
| 83 |
+
default_mode: min
|
| 84 |
+
hyperband:
|
| 85 |
+
max_t: 200
|
| 86 |
+
reduction_factor: 4
|
| 87 |
+
hyperopt:
|
| 88 |
+
n_random_steps: 10
|
| 89 |
+
local_dir: /mnt/ceph/users/ewulff/ray_results
|
| 90 |
+
nevergrad:
|
| 91 |
+
n_random_steps: 10
|
| 92 |
+
sched: null
|
| 93 |
+
search_alg: null
|
| 94 |
+
save_attention: true
|
| 95 |
+
sort_data: false
|
| 96 |
+
test: null
|
| 97 |
+
test_dataset:
|
| 98 |
+
clic_edm_qq_pf:
|
| 99 |
+
splits:
|
| 100 |
+
- 1
|
| 101 |
+
- 2
|
| 102 |
+
- 3
|
| 103 |
+
- 4
|
| 104 |
+
- 5
|
| 105 |
+
- 6
|
| 106 |
+
- 7
|
| 107 |
+
- 8
|
| 108 |
+
- 9
|
| 109 |
+
- 10
|
| 110 |
+
version: 2.5.0
|
| 111 |
+
clic_edm_ttbar_pf:
|
| 112 |
+
splits:
|
| 113 |
+
- 1
|
| 114 |
+
- 2
|
| 115 |
+
- 3
|
| 116 |
+
- 4
|
| 117 |
+
- 5
|
| 118 |
+
- 6
|
| 119 |
+
- 7
|
| 120 |
+
- 8
|
| 121 |
+
- 9
|
| 122 |
+
- 10
|
| 123 |
+
version: 2.5.0
|
| 124 |
+
clic_edm_ww_fullhad_pf:
|
| 125 |
+
splits:
|
| 126 |
+
- 1
|
| 127 |
+
- 2
|
| 128 |
+
- 3
|
| 129 |
+
- 4
|
| 130 |
+
- 5
|
| 131 |
+
- 6
|
| 132 |
+
- 7
|
| 133 |
+
- 8
|
| 134 |
+
- 9
|
| 135 |
+
- 10
|
| 136 |
+
version: 2.5.0
|
| 137 |
+
train: true
|
| 138 |
+
train_dataset:
|
| 139 |
+
clic:
|
| 140 |
+
physical:
|
| 141 |
+
batch_size: 1
|
| 142 |
+
samples:
|
| 143 |
+
clic_edm_qq_pf:
|
| 144 |
+
splits:
|
| 145 |
+
- 1
|
| 146 |
+
- 2
|
| 147 |
+
- 3
|
| 148 |
+
- 4
|
| 149 |
+
- 5
|
| 150 |
+
- 6
|
| 151 |
+
- 7
|
| 152 |
+
- 8
|
| 153 |
+
- 9
|
| 154 |
+
- 10
|
| 155 |
+
version: 2.5.0
|
| 156 |
+
clic_edm_ttbar_pf:
|
| 157 |
+
splits:
|
| 158 |
+
- 1
|
| 159 |
+
- 2
|
| 160 |
+
- 3
|
| 161 |
+
- 4
|
| 162 |
+
- 5
|
| 163 |
+
- 6
|
| 164 |
+
- 7
|
| 165 |
+
- 8
|
| 166 |
+
- 9
|
| 167 |
+
- 10
|
| 168 |
+
version: 2.5.0
|
| 169 |
+
clic_edm_ww_fullhad_pf:
|
| 170 |
+
splits:
|
| 171 |
+
- 1
|
| 172 |
+
- 2
|
| 173 |
+
- 3
|
| 174 |
+
- 4
|
| 175 |
+
- 5
|
| 176 |
+
- 6
|
| 177 |
+
- 7
|
| 178 |
+
- 8
|
| 179 |
+
- 9
|
| 180 |
+
- 10
|
| 181 |
+
version: 2.5.0
|
| 182 |
+
val_freq: null
|
| 183 |
+
valid_dataset:
|
| 184 |
+
clic:
|
| 185 |
+
physical:
|
| 186 |
+
batch_size: 1
|
| 187 |
+
samples:
|
| 188 |
+
clic_edm_qq_pf:
|
| 189 |
+
splits:
|
| 190 |
+
- 1
|
| 191 |
+
- 2
|
| 192 |
+
- 3
|
| 193 |
+
- 4
|
| 194 |
+
- 5
|
| 195 |
+
- 6
|
| 196 |
+
- 7
|
| 197 |
+
- 8
|
| 198 |
+
- 9
|
| 199 |
+
- 10
|
| 200 |
+
version: 2.5.0
|
| 201 |
+
clic_edm_ttbar_pf:
|
| 202 |
+
splits:
|
| 203 |
+
- 1
|
| 204 |
+
- 2
|
| 205 |
+
- 3
|
| 206 |
+
- 4
|
| 207 |
+
- 5
|
| 208 |
+
- 6
|
| 209 |
+
- 7
|
| 210 |
+
- 8
|
| 211 |
+
- 9
|
| 212 |
+
- 10
|
| 213 |
+
version: 2.5.0
|
| 214 |
+
clic_edm_ww_fullhad_pf:
|
| 215 |
+
splits:
|
| 216 |
+
- 1
|
| 217 |
+
- 2
|
| 218 |
+
- 3
|
| 219 |
+
- 4
|
| 220 |
+
- 5
|
| 221 |
+
- 6
|
| 222 |
+
- 7
|
| 223 |
+
- 8
|
| 224 |
+
- 9
|
| 225 |
+
- 10
|
| 226 |
+
version: 2.5.0
|
| 227 |
+
weight_decay: 0.03
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/runs/train/events.out.tfevents.1741957152.workergpu072.934288.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18f9ea316e72db389ce5a1a6bd076330c12189182a3d9de30699ccb472188ca8
|
| 3 |
+
size 52833
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/runs/valid/events.out.tfevents.1741957152.workergpu072.934288.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15eac2fffca0b2c3b3c51d82c685ff3d2e41d521b9e4bed220b30e5cac13f451
|
| 3 |
+
size 11978655
|
clic/clusters/v2.3.0/largebatch_clic_wd3eneg2_gpus4_lr4eneg4_epochs10_pyg-clic-v230_adamw_tunedweightdecay_20250314_085408_738888/train-config.yaml
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoint_freq: 1
|
| 2 |
+
comet: true
|
| 3 |
+
comet_name: particleflow-pt
|
| 4 |
+
comet_offline: false
|
| 5 |
+
comet_step_freq: 1000
|
| 6 |
+
conv_type: attention
|
| 7 |
+
data_dir: /mnt/ceph/users/ewulff/tensorflow_datasets/clic
|
| 8 |
+
dataset: clic
|
| 9 |
+
dtype: bfloat16
|
| 10 |
+
enabled_test_datasets:
|
| 11 |
+
- clic_edm_qq_pf
|
| 12 |
+
finetune: null
|
| 13 |
+
gpu_batch_multiplier: 256
|
| 14 |
+
gpus: 4
|
| 15 |
+
load: null
|
| 16 |
+
lr: 0.0004
|
| 17 |
+
lr_schedule: cosinedecay
|
| 18 |
+
lr_schedule_config:
|
| 19 |
+
onecycle:
|
| 20 |
+
pct_start: 0.3
|
| 21 |
+
make_plots: null
|
| 22 |
+
model:
|
| 23 |
+
attention:
|
| 24 |
+
activation: relu
|
| 25 |
+
attention_type: flash
|
| 26 |
+
conv_type: attention
|
| 27 |
+
dropout_conv_id_ff: 0.0
|
| 28 |
+
dropout_conv_id_mha: 0.0
|
| 29 |
+
dropout_conv_reg_ff: 0.0
|
| 30 |
+
dropout_conv_reg_mha: 0.0
|
| 31 |
+
dropout_ff: 0.0
|
| 32 |
+
head_dim: 32
|
| 33 |
+
num_convs: 3
|
| 34 |
+
num_heads: 32
|
| 35 |
+
use_pre_layernorm: true
|
| 36 |
+
cos_phi_mode: linear
|
| 37 |
+
energy_mode: direct-elemtype-split
|
| 38 |
+
eta_mode: linear
|
| 39 |
+
gnn_lsh:
|
| 40 |
+
activation: elu
|
| 41 |
+
bin_size: 32
|
| 42 |
+
conv_type: gnn_lsh
|
| 43 |
+
distance_dim: 128
|
| 44 |
+
embedding_dim: 512
|
| 45 |
+
ffn_dist_hidden_dim: 128
|
| 46 |
+
ffn_dist_num_layers: 2
|
| 47 |
+
layernorm: true
|
| 48 |
+
max_num_bins: 200
|
| 49 |
+
num_convs: 8
|
| 50 |
+
num_node_messages: 2
|
| 51 |
+
width: 512
|
| 52 |
+
input_encoding: split
|
| 53 |
+
learned_representation_mode: last
|
| 54 |
+
mamba:
|
| 55 |
+
activation: elu
|
| 56 |
+
conv_type: mamba
|
| 57 |
+
d_conv: 4
|
| 58 |
+
d_state: 16
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
embedding_dim: 128
|
| 61 |
+
expand: 2
|
| 62 |
+
num_convs: 2
|
| 63 |
+
num_heads: 2
|
| 64 |
+
width: 128
|
| 65 |
+
pt_mode: direct-elemtype-split
|
| 66 |
+
sin_phi_mode: linear
|
| 67 |
+
trainable: all
|
| 68 |
+
ntest: 2000
|
| 69 |
+
ntrain: null
|
| 70 |
+
num_epochs: 10
|
| 71 |
+
num_workers: 12
|
| 72 |
+
nvalid: null
|
| 73 |
+
optimizer: adamw
|
| 74 |
+
patience: 20
|
| 75 |
+
prefetch_factor: 100
|
| 76 |
+
raytune:
|
| 77 |
+
asha:
|
| 78 |
+
brackets: 1
|
| 79 |
+
grace_period: 4
|
| 80 |
+
max_t: 200
|
| 81 |
+
reduction_factor: 4
|
| 82 |
+
default_metric: val_loss
|
| 83 |
+
default_mode: min
|
| 84 |
+
hyperband:
|
| 85 |
+
max_t: 200
|
| 86 |
+
reduction_factor: 4
|
| 87 |
+
hyperopt:
|
| 88 |
+
n_random_steps: 10
|
| 89 |
+
local_dir: /mnt/ceph/users/ewulff/ray_results
|
| 90 |
+
nevergrad:
|
| 91 |
+
n_random_steps: 10
|
| 92 |
+
sched: null
|
| 93 |
+
search_alg: null
|
| 94 |
+
save_attention: true
|
| 95 |
+
sort_data: false
|
| 96 |
+
test: null
|
| 97 |
+
test_dataset:
|
| 98 |
+
clic_edm_qq_pf:
|
| 99 |
+
splits:
|
| 100 |
+
- 1
|
| 101 |
+
- 2
|
| 102 |
+
- 3
|
| 103 |
+
- 4
|
| 104 |
+
- 5
|
| 105 |
+
- 6
|
| 106 |
+
- 7
|
| 107 |
+
- 8
|
| 108 |
+
- 9
|
| 109 |
+
- 10
|
| 110 |
+
version: 2.5.0
|
| 111 |
+
clic_edm_ttbar_pf:
|
| 112 |
+
splits:
|
| 113 |
+
- 1
|
| 114 |
+
- 2
|
| 115 |
+
- 3
|
| 116 |
+
- 4
|
| 117 |
+
- 5
|
| 118 |
+
- 6
|
| 119 |
+
- 7
|
| 120 |
+
- 8
|
| 121 |
+
- 9
|
| 122 |
+
- 10
|
| 123 |
+
version: 2.5.0
|
| 124 |
+
clic_edm_ww_fullhad_pf:
|
| 125 |
+
splits:
|
| 126 |
+
- 1
|
| 127 |
+
- 2
|
| 128 |
+
- 3
|
| 129 |
+
- 4
|
| 130 |
+
- 5
|
| 131 |
+
- 6
|
| 132 |
+
- 7
|
| 133 |
+
- 8
|
| 134 |
+
- 9
|
| 135 |
+
- 10
|
| 136 |
+
version: 2.5.0
|
| 137 |
+
train: true
|
| 138 |
+
train_dataset:
|
| 139 |
+
clic:
|
| 140 |
+
physical:
|
| 141 |
+
batch_size: 1
|
| 142 |
+
samples:
|
| 143 |
+
clic_edm_qq_pf:
|
| 144 |
+
splits:
|
| 145 |
+
- 1
|
| 146 |
+
- 2
|
| 147 |
+
- 3
|
| 148 |
+
- 4
|
| 149 |
+
- 5
|
| 150 |
+
- 6
|
| 151 |
+
- 7
|
| 152 |
+
- 8
|
| 153 |
+
- 9
|
| 154 |
+
- 10
|
| 155 |
+
version: 2.5.0
|
| 156 |
+
clic_edm_ttbar_pf:
|
| 157 |
+
splits:
|
| 158 |
+
- 1
|
| 159 |
+
- 2
|
| 160 |
+
- 3
|
| 161 |
+
- 4
|
| 162 |
+
- 5
|
| 163 |
+
- 6
|
| 164 |
+
- 7
|
| 165 |
+
- 8
|
| 166 |
+
- 9
|
| 167 |
+
- 10
|
| 168 |
+
version: 2.5.0
|
| 169 |
+
clic_edm_ww_fullhad_pf:
|
| 170 |
+
splits:
|
| 171 |
+
- 1
|
| 172 |
+
- 2
|
| 173 |
+
- 3
|
| 174 |
+
- 4
|
| 175 |
+
- 5
|
| 176 |
+
- 6
|
| 177 |
+
- 7
|
| 178 |
+
- 8
|
| 179 |
+
- 9
|
| 180 |
+
- 10
|
| 181 |
+
version: 2.5.0
|
| 182 |
+
val_freq: null
|
| 183 |
+
valid_dataset:
|
| 184 |
+
clic:
|
| 185 |
+
physical:
|
| 186 |
+
batch_size: 1
|
| 187 |
+
samples:
|
| 188 |
+
clic_edm_qq_pf:
|
| 189 |
+
splits:
|
| 190 |
+
- 1
|
| 191 |
+
- 2
|
| 192 |
+
- 3
|
| 193 |
+
- 4
|
| 194 |
+
- 5
|
| 195 |
+
- 6
|
| 196 |
+
- 7
|
| 197 |
+
- 8
|
| 198 |
+
- 9
|
| 199 |
+
- 10
|
| 200 |
+
version: 2.5.0
|
| 201 |
+
clic_edm_ttbar_pf:
|
| 202 |
+
splits:
|
| 203 |
+
- 1
|
| 204 |
+
- 2
|
| 205 |
+
- 3
|
| 206 |
+
- 4
|
| 207 |
+
- 5
|
| 208 |
+
- 6
|
| 209 |
+
- 7
|
| 210 |
+
- 8
|
| 211 |
+
- 9
|
| 212 |
+
- 10
|
| 213 |
+
version: 2.5.0
|
| 214 |
+
clic_edm_ww_fullhad_pf:
|
| 215 |
+
splits:
|
| 216 |
+
- 1
|
| 217 |
+
- 2
|
| 218 |
+
- 3
|
| 219 |
+
- 4
|
| 220 |
+
- 5
|
| 221 |
+
- 6
|
| 222 |
+
- 7
|
| 223 |
+
- 8
|
| 224 |
+
- 9
|
| 225 |
+
- 10
|
| 226 |
+
version: 2.5.0
|
| 227 |
+
weight_decay: 0.03
|