# Add this to your config for sparse attention every other layer | |
{ | |
"attention_config": [[["local", "global"], "all"]], | |
# sparsity config: | |
# (these are the defaults for local sliding window sparsity, training will work without this here, but it's left in for | |
# illustrative purposes) | |
# see https://www.deepspeed.ai/tutorials/sparse-attention/#how-to-config-sparsity-structures for | |
# more detailed config instructions and available parameters | |
"sparsity_config": { | |
"block": 16, # block size | |
"num_local_blocks": 32, | |
} | |
} | |