Connor
commited on
Commit
•
e6c769b
1
Parent(s):
29e23ac
put all attn saes in one repo
Browse files- gpt2-small_L0_Hcat_z_lr1.20e-03_l11.80e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L0_Hcat_z_lr1.20e-03_l11.80e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L10_Hcat_z_lr1.20e-03_l11.30e+00_ds24576_bs4096_dc1.00e-05_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L10_Hcat_z_lr1.20e-03_l11.30e+00_ds24576_bs4096_dc1.00e-05_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L11_Hcat_z_lr1.20e-03_l13.00e+00_ds24576_bs4096_dc3.16e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L11_Hcat_z_lr1.20e-03_l13.00e+00_ds24576_bs4096_dc3.16e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L1_Hcat_z_lr1.20e-03_l18.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v5.pt +3 -0
- gpt2-small_L1_Hcat_z_lr1.20e-03_l18.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v5_cfg.json +1 -0
- gpt2-small_L2_Hcat_z_lr1.20e-03_l11.00e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v4.pt +3 -0
- gpt2-small_L2_Hcat_z_lr1.20e-03_l11.00e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v4_cfg.json +1 -0
- gpt2-small_L3_Hcat_z_lr1.20e-03_l19.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L3_Hcat_z_lr1.20e-03_l19.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L4_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v7.pt +3 -0
- gpt2-small_L4_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v7_cfg.json +1 -0
- gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L6_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L6_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L7_Hcat_z_lr1.20e-03_l11.10e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L7_Hcat_z_lr1.20e-03_l11.10e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
- gpt2-small_L8_Hcat_z_lr1.20e-03_l11.50e+00_ds24576_bs4096_dc1.00e-05_rsanthropic_rie25000_nr4_v6_cfg.json +1 -0
- gpt2-small_L9_Hcat_z_lr1.20e-03_l11.20e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt +3 -0
- gpt2-small_L9_Hcat_z_lr1.20e-03_l11.20e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json +1 -0
gpt2-small_L0_Hcat_z_lr1.20e-03_l11.80e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b822b5e489757eb94e18b0ba4bdb9ca9ff04eef25ff9c9cd824026b5d5edf38f
|
3 |
+
size 151099672
|
gpt2-small_L0_Hcat_z_lr1.20e-03_l11.80e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.8, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 0, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L0-20240117", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_0_24576_z"}
|
gpt2-small_L10_Hcat_z_lr1.20e-03_l11.30e+00_ds24576_bs4096_dc1.00e-05_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:887e17c99430f925b36a645819d1275e58ccd534b9612f5c6614bb215840a8a9
|
3 |
+
size 151099680
|
gpt2-small_L10_Hcat_z_lr1.20e-03_l11.30e+00_ds24576_bs4096_dc1.00e-05_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.3, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 10, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-05, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L10-20240118", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.10.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_10_24576_z"}
|
gpt2-small_L11_Hcat_z_lr1.20e-03_l13.00e+00_ds24576_bs4096_dc3.16e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aae765f1116d42a1a319af1db93cb2f6e134ec292bb0eccd43bd4021834fefe
|
3 |
+
size 151099680
|
gpt2-small_L11_Hcat_z_lr1.20e-03_l13.00e+00_ds24576_bs4096_dc3.16e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 3.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 11, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 3.162277660168379e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L11-20240117", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.11.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_11_24576_z"}
|
gpt2-small_L1_Hcat_z_lr1.20e-03_l18.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v5.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48f07690fa645b6fb8db5cbc7b9a981847ac132d1684d9b82bd15d3befe205fc
|
3 |
+
size 151099672
|
gpt2-small_L1_Hcat_z_lr1.20e-03_l18.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v5_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 0.8, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L1-20240117", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_1_24576_z"}
|
gpt2-small_L2_Hcat_z_lr1.20e-03_l11.00e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v4.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f3541a135a32f2c69de8e7bd5d5fd184b3696c4bf8421b51e855bb2e4630e5e
|
3 |
+
size 151099672
|
gpt2-small_L2_Hcat_z_lr1.20e-03_l11.00e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v4_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 1000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 2, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L2-20240117", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.2.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_2_24576_z"}
|
gpt2-small_L3_Hcat_z_lr1.20e-03_l19.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fd318e6fe6b33bd7fd0ece3e4d06df945ad3217ff84aa707f5dddedb44b4bcf
|
3 |
+
size 151099672
|
gpt2-small_L3_Hcat_z_lr1.20e-03_l19.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 0.9, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 3, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L3-20240118", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.3.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_3_24576_z"}
|
gpt2-small_L4_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v7.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74f68e3d0afafef447e83c0aaa77085c83b18f562c407aa76265fc24dc3c3dcf
|
3 |
+
size 151099672
|
gpt2-small_L4_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v7_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.1, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 4, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L4-20240118", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.4.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_4_24576_z"}
|
gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f25a65b6b97de636f1937cb8148ebbe0922cdccc0af9b1911248233232487bc
|
3 |
+
size 151099672
|
gpt2-small_L5_Hcat_z_lr1.20e-03_l16.00e-01_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 0.6, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 5, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L5-20240112", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.5.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_5_24576_z"}
|
gpt2-small_L6_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33aad2927d141bcb4aa1306648dd497f125bb00af0ca9c15fb54ad7ca27a4a26
|
3 |
+
size 151099672
|
gpt2-small_L6_Hcat_z_lr1.20e-03_l11.10e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.1, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 6, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L6-20240118", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.6.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_6_24576_z"}
|
gpt2-small_L7_Hcat_z_lr1.20e-03_l11.10e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6829f8d529c4444a3d3d9b72418181811282215705be4b5010b41a7edb54770
|
3 |
+
size 302192920
|
gpt2-small_L7_Hcat_z_lr1.20e-03_l11.10e+00_ds49152_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.1, "beta1": 0.9, "beta2": 0.99, "dict_mult": 64, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 7, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L7-20240117", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.7.attn.hook_z", "act_size": 768, "dict_size": 49152, "name": "gpt2-small_7_49152_z"}
|
gpt2-small_L8_Hcat_z_lr1.20e-03_l11.50e+00_ds24576_bs4096_dc1.00e-05_rsanthropic_rie25000_nr4_v6_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.5, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 8, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-05, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-test-L8-20240118", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.8.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_8_24576_z"}
|
gpt2-small_L9_Hcat_z_lr1.20e-03_l11.20e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aecf46e40ea4db5fcc5247bf0da81354192b697a1614a3aff9ee978cf1e2d7f
|
3 |
+
size 151099672
|
gpt2-small_L9_Hcat_z_lr1.20e-03_l11.20e+00_ds24576_bs4096_dc1.00e-06_rsanthropic_rie25000_nr4_v9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0012, "num_tokens": 2000000000, "l1_coeff": 1.2, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gpt2-small", "site": "z", "layer": 9, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-06, "re_init_every": 25000, "anthropic_resample_last": 12500, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gpt2-L9-20240117", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "b_dec_init": "zeros", "sched_type": "cosine_warmup", "sched_epochs": 1000, "sched_lr_factor": 0.1, "sched_warmup_epochs": 1000, "sched_finish": true, "anthropic_resample_batches": 100, "eval_every": 1000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.9.attn.hook_z", "act_size": 768, "dict_size": 24576, "name": "gpt2-small_9_24576_z"}
|