{ "adaln_group": false, "attn_cfg": {}, "attn_layer_idx": [], "d_intermediate": 0, "d_model": 768, "fused_add_norm": true, "mixer_drop": 0.0, "mlp_drop": 0.0, "n_layer": 24, "num_classes": 1000, "num_groups": 1, "num_tokens": 256, "pad_vocab_size_multiple": 8, "residual_in_fp32": true, "rms_norm": true, "ssm_cfg": { "layer": "Mamba2" }, "tie_embeddings": true, "token_drop": 0.0, "vocab_size": 16384 }