act_frequency_n_tokens: 500000 batch_size: 8 collect_act_frequency_every_n_samples: 40000 collect_output_metrics_every_n_samples: 0 cooldown_samples: 0 effective_batch_size: 16 eval_data: column_name: input_ids dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 is_tokenized: true n_ctx: 1024 seed: 0 split: train streaming: true tokenizer_name: gpt2 eval_every_n_samples: 40000 eval_n_samples: 500 log_every_n_grad_steps: 20 loss: in_to_orig: null logits_kl: null out_to_in: coeff: 1.0 out_to_orig: null sparsity: coeff: 4.0 p_norm: 1.0 lr: 0.0005 lr_schedule: cosine max_grad_norm: 10.0 min_lr_factor: 0.1 n_samples: 400000 saes: dict_size_to_input_ratio: 60.0 k: null pretrained_sae_paths: null retrain_saes: false sae_positions: - blocks.2.hook_resid_pre type_of_sparsifier: sae save_dir: /mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out save_every_n_samples: null seed: 0 tlens_model_name: gpt2-small tlens_model_path: null train_data: column_name: input_ids dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2 is_tokenized: true n_ctx: 1024 seed: 0 split: train streaming: true tokenizer_name: gpt2 wandb_project: gpt2-layerwise_play wandb_run_name: null wandb_run_name_prefix: '' warmup_samples: 20000