aim-base / config.json
hp-l33's picture
Push model using huggingface_hub.
a07be7e verified
raw
history blame contribute delete
447 Bytes
{
"adaln_group": false,
"attn_cfg": {},
"attn_layer_idx": [],
"d_intermediate": 0,
"d_model": 768,
"fused_add_norm": true,
"mixer_drop": 0.0,
"mlp_drop": 0.0,
"n_layer": 24,
"num_classes": 1000,
"num_groups": 1,
"num_tokens": 256,
"pad_vocab_size_multiple": 8,
"residual_in_fp32": true,
"rms_norm": true,
"ssm_cfg": {
"layer": "Mamba2"
},
"tie_embeddings": true,
"token_drop": 0.0,
"vocab_size": 16384
}