maximuspowers's picture
Upload weight-space autoencoder (encoder + decoder) and configuration
b027849 verified
architecture:
latent_dim: 128
transformer:
decoder:
activation: relu
d_model: 512
dim_feedforward: 2048
dropout: 0.05
norm_first: true
num_heads: 8
num_layers: 6
encoder:
activation: relu
d_model: 512
dim_feedforward: 2048
dropout: 0.05
norm_first: true
num_heads: 8
num_layers: 6
pooling: mean
positional_encoding: learned
type: transformer
dataloader:
num_workers: 0
pin_memory: true
dataset:
hf_dataset: maximuspowers/muat-fourier-5
input_mode: signature
max_dimensions:
max_hidden_layers: 6
max_neurons_per_layer: 8
max_sequence_length: 5
neuron_profile:
features_per_neuron: 5
methods:
- fourier
random_seed: 42
test_split: 0.1
train_split: 0.8
val_split: 0.1
device:
type: auto
evaluation:
metrics:
- mse
- mae
- rmse
- cosine_similarity
- relative_error
- r2_score
per_layer_metrics: false
hub:
enabled: true
private: false
push_logs: true
push_metrics: true
push_model: true
repo_id: maximuspowers/sig-autoencoder-fourier-5-supervised-mse
token: <REDACTED>
logging:
checkpoint:
enabled: true
mode: min
monitor: val_loss
save_best_only: true
tensorboard:
auto_launch: true
enabled: true
log_interval: 10
port: 6006
verbose: true
loss:
covariance_weight: 0.04
gamma: 0.2
gamma_schedule:
decay_type: linear
enabled: false
final: 0.2
initial: 0.1
warmup_epochs: 50
projection_head:
hidden_dim: 256
input_dim: 128
output_dim: 32
projection_head_lr: 0.001
reconstruction_type: mse
temperature: 0.08
temperature_schedule:
decay_type: linear
enabled: true
final: 0.08
initial: 0.3
warmup_epochs: 30
type: contrastive
variance_weight: 1.0
run_dir: /Users/max/Desktop/muat/model_zoo/runs/train-encoder-decoder_config_2025-12-16_12-50-47
run_log_cleanup: false
tokenization:
chunk_size: 1
granularity: neuron
include_metadata: true
max_tokens: 64
training:
batch_size: 32
early_stopping:
enabled: true
mode: min
monitor: val_loss
patience: 15
ema_decay: 0.999
epochs: 250
gradient_accumulation_steps: 4
learning_rate: 0.0001
lr_scheduler:
enabled: true
factor: 0.5
min_lr: 1.0e-06
patience: 3
type: cosine_warmup
warmup_epochs: 15
max_grad_norm: 1.0
optimizer: adamw
use_ema: true
weight_decay: 0.0001