File size: 542 Bytes
d90b3a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
# Add this to your config for sparse attention every other layer
{
"attention_config": [[["local", "global"], "all"]],
# sparsity config:
# (these are the defaults for local sliding window sparsity, training will work without this here, but it's left in for
# illustrative purposes)
# see https://www.deepspeed.ai/tutorials/sparse-attention/#how-to-config-sparsity-structures for
# more detailed config instructions and available parameters
"sparsity_config": {
"block": 16, # block size
"num_local_blocks": 32,
}
}
|