base_model: Qwen/Qwen2-7B | |
gate_mode: random | |
architecture: qwen | |
dtype: bfloat16 | |
experts: | |
- source_model: Qwen/Qwen2-7B | |
positive_prompts: [] | |
- source_model: Qwen/Qwen2-7B | |
positive_prompts: [] | |
shared_experts: | |
- source_model: Qwen/Qwen2-7B | |
positive_prompts: [] | |
residual_scale: 0.1 | |