maximuspowers commited on
Commit
a6b5967
·
verified ·
1 Parent(s): aafc363

Upload weight-space autoencoder (encoder + decoder) and configuration

Browse files
Files changed (5) hide show
  1. README.md +7 -8
  2. config.yaml +20 -15
  3. decoder.pt +2 -2
  4. encoder.pt +2 -2
  5. tokenizer_config.json +1 -3
README.md CHANGED
@@ -18,13 +18,12 @@ It includes both an encoder (compresses weights into latent representations) and
18
  - **Architecture**: Transformer encoder-decoder
19
  - **Training Dataset**: maximuspowers/muat-fourier-5
20
  - **Input Mode**: signature
21
- - **Latent Dimension**: 128
22
 
23
  ## Tokenization
24
 
25
- - **Chunk Size**: 1 weight values per token
26
  - **Max Tokens**: 64
27
- - **Metadata**: True
28
 
29
  ## Training Config
30
 
@@ -35,8 +34,8 @@ It includes both an encoder (compresses weights into latent representations) and
35
 
36
  ## Performance Metrics (Test Set)
37
 
38
- - **MSE**: 0.105820
39
- - **MAE**: 0.208260
40
- - **RMSE**: 0.325300
41
- - **Cosine Similarity**: 0.9560
42
- - **R² Score**: 0.9830
 
18
  - **Architecture**: Transformer encoder-decoder
19
  - **Training Dataset**: maximuspowers/muat-fourier-5
20
  - **Input Mode**: signature
21
+ - **Latent Dimension**: 256
22
 
23
  ## Tokenization
24
 
25
+ - **Granularity**: neuron
26
  - **Max Tokens**: 64
 
27
 
28
  ## Training Config
29
 
 
34
 
35
  ## Performance Metrics (Test Set)
36
 
37
+ - **MSE**: 0.125011
38
+ - **MAE**: 0.259796
39
+ - **RMSE**: 0.353570
40
+ - **Cosine Similarity**: 0.0348
41
+ - **R² Score**: -0.0097
config.yaml CHANGED
@@ -1,22 +1,27 @@
1
  architecture:
2
- latent_dim: 128
 
 
 
 
3
  transformer:
4
  decoder:
5
  activation: gelu
6
  d_model: 512
7
  dim_feedforward: 2048
8
- dropout: 0.1
9
  num_heads: 8
10
  num_layers: 6
11
  encoder:
12
  activation: gelu
13
  d_model: 512
14
  dim_feedforward: 2048
15
- dropout: 0.1
16
  num_heads: 8
17
- num_layers: 6
18
  pooling: mean
19
  positional_encoding: learned
 
20
  type: transformer
21
  dataloader:
22
  num_workers: 0
@@ -75,26 +80,26 @@ loss:
75
  contrastive:
76
  enabled: true
77
  projection_head:
78
- hidden_dim: 64
79
- input_dim: 128
80
- output_dim: 32
81
  temperature: 0.1
82
- weight: 0.4
83
  functional:
84
- benchmark_path: /configs/autoencoder/benchmark_dataset.json
85
  enabled: true
86
  test_samples: null
87
- weight: 0.4
88
  reconstruction:
89
  enabled: true
90
  type: mse
91
- weight: 0.2
92
- run_dir: /Users/max/Desktop/muat/model_zoo/runs/train-encoder-decoder_config_2025-12-17_19-33-32
93
  run_log_cleanup: false
94
  tokenization:
95
  chunk_size: 1
96
  granularity: neuron
97
- include_metadata: true
98
  max_tokens: 64
99
  training:
100
  batch_size: 32
@@ -102,8 +107,8 @@ training:
102
  enabled: true
103
  mode: min
104
  monitor: val_loss
105
- patience: 15
106
- epochs: 250
107
  gradient_accumulation_steps: 4
108
  learning_rate: 0.0001
109
  lr_scheduler:
 
1
  architecture:
2
+ arch_encoder:
3
+ embed_dim: 64
4
+ max_layers: 6
5
+ max_neurons: 8
6
+ latent_dim: 256
7
  transformer:
8
  decoder:
9
  activation: gelu
10
  d_model: 512
11
  dim_feedforward: 2048
12
+ dropout: 0.025
13
  num_heads: 8
14
  num_layers: 6
15
  encoder:
16
  activation: gelu
17
  d_model: 512
18
  dim_feedforward: 2048
19
+ dropout: 0.025
20
  num_heads: 8
21
+ num_layers: 10
22
  pooling: mean
23
  positional_encoding: learned
24
+ use_positional_encoding: true
25
  type: transformer
26
  dataloader:
27
  num_workers: 0
 
80
  contrastive:
81
  enabled: true
82
  projection_head:
83
+ hidden_dim: 128
84
+ input_dim: 256
85
+ output_dim: 64
86
  temperature: 0.1
87
+ weight: 0.05
88
  functional:
89
+ benchmark_path: /Users/max/Desktop/muat/model_zoo/configs/autoencoder/benchmark_dataset.json
90
  enabled: true
91
  test_samples: null
92
+ weight: 0.5
93
  reconstruction:
94
  enabled: true
95
  type: mse
96
+ weight: 0.6
97
+ run_dir: /Users/max/Desktop/muat/model_zoo/runs/train-encoder-decoder_config_2025-12-18_22-23-46
98
  run_log_cleanup: false
99
  tokenization:
100
  chunk_size: 1
101
  granularity: neuron
102
+ include_metadata: false
103
  max_tokens: 64
104
  training:
105
  batch_size: 32
 
107
  enabled: true
108
  mode: min
109
  monitor: val_loss
110
+ patience: 50
111
+ epochs: 1000
112
  gradient_accumulation_steps: 4
113
  learning_rate: 0.0001
114
  lr_scheduler:
decoder.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a7e1b2bed452a4562d4f0e6fb7e47a75e917bfbf6a68f660bdfc3194fabfdca
3
- size 101365774
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c8bdb28e21e756364067eaee646097f7e24a78aa907a48d4c34a08c1c7df45f
3
+ size 103255652
encoder.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ebcb2592d5bb6ef3f7806da61037cc769ad5f29534c6dbdb683228624a2db38
3
- size 76106790
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394e0021fc9d3862b3307c018dbce6118ea0c2b6664766d1729081a3904ca1d8
3
+ size 126816306
tokenizer_config.json CHANGED
@@ -1,9 +1,7 @@
1
  {
2
  "chunk_size": 1,
3
  "max_tokens": 64,
4
- "include_metadata": true,
5
- "metadata_features": 5,
6
- "token_dim": 14,
7
  "granularity": "neuron",
8
  "max_neuron_data_size": 9
9
  }
 
1
  {
2
  "chunk_size": 1,
3
  "max_tokens": 64,
4
+ "token_dim": 9,
 
 
5
  "granularity": "neuron",
6
  "max_neuron_data_size": 9
7
  }