allknowingroger commited on
Commit
1d994ee
1 Parent(s): 2ac9fc1

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - moe
5
+ - frankenmoe
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - lex-hue/Delexa-7b
10
+ - allknowingroger/Neurallaymons-7B-slerp
11
+ base_model:
12
+ - lex-hue/Delexa-7b
13
+ - allknowingroger/Neurallaymons-7B-slerp
14
+ ---
15
+
16
+ # DelexaMaths-12B-MoE
17
+
18
+ DelexaMaths-12B-MoE is a Mixture of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
19
+ * [lex-hue/Delexa-7b](https://huggingface.co/lex-hue/Delexa-7b)
20
+ * [allknowingroger/Neurallaymons-7B-slerp](https://huggingface.co/allknowingroger/Neurallaymons-7B-slerp)
21
+
22
+ ## 🧩 Configuration
23
+
24
+ ```yaml
25
+ base_model: lex-hue/Delexa-7b
26
+ experts:
27
+ - source_model: lex-hue/Delexa-7b
28
+ positive_prompts: ["what"]
29
+ - source_model: allknowingroger/Neurallaymons-7B-slerp
30
+ positive_prompts: ["math"]
31
+ ```
32
+
33
+ ## 💻 Usage
34
+
35
+ ```python
36
+ !pip install -qU transformers bitsandbytes accelerate
37
+
38
+ from transformers import AutoTokenizer
39
+ import transformers
40
+ import torch
41
+
42
+ model = "allknowingroger/DelexaMaths-12B-MoE"
43
+
44
+ tokenizer = AutoTokenizer.from_pretrained(model)
45
+ pipeline = transformers.pipeline(
46
+ "text-generation",
47
+ model=model,
48
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
49
+ )
50
+
51
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
52
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
53
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
54
+ print(outputs[0]["generated_text"])
55
+ ```
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "lex-hue/Delexa-7b",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "lex-hue/Delexa-7b--configuration_mistral.MistralConfig",
9
+ "AutoModelForCausalLM": "lex-hue/Delexa-7b--modeling_mistral_yarn.MistralForCausalLM"
10
+ },
11
+ "bos_token_id": 1,
12
+ "eos_token_id": 2,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 14336,
17
+ "max_position_embeddings": 32768,
18
+ "max_sequence_length": 131072,
19
+ "model_type": "mixtral",
20
+ "num_attention_heads": 32,
21
+ "num_experts_per_tok": 2,
22
+ "num_hidden_layers": 32,
23
+ "num_key_value_heads": 8,
24
+ "num_local_experts": 2,
25
+ "output_router_logits": false,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": {
28
+ "factor": 16.0,
29
+ "finetuned": true,
30
+ "original_max_position_embeddings": 8192,
31
+ "type": "yarn"
32
+ },
33
+ "rope_theta": 10000.0,
34
+ "router_aux_loss_coef": 0.001,
35
+ "sliding_window": null,
36
+ "tie_word_embeddings": false,
37
+ "torch_dtype": "float16",
38
+ "transformers_version": "4.39.3",
39
+ "use_cache": true,
40
+ "vocab_size": 32000
41
+ }
model-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ed3e1abc223db072ad226b02aaaf70e38b8f8f48234933491aefc3ceb4a10f
3
+ size 1933849864
model-10.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e001c599b4d9b1a7f51fae5e7cd21ca3a539d1e9ce7edf1c13048eff71399c5
3
+ size 1996490952
model-11.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d609b4bebed499eb5b59849474ab6a72458ea499ae5ca15136fc1dbc985b35
3
+ size 1996490952
model-12.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331c11642cb1b59144c7f2e622ab44b9253edfc830889d363a1870cf22d0f764
3
+ size 1979981504
model-2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:901bde884ad4df4ce87d72900b856d96cc4d1e3ac16aa064da4ba7abf3e0e485
3
+ size 1996490944
model-3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5274a73f22da9b154deb8cf3f0bce349b4c1720d787280311dcd94ff5cb25f53
3
+ size 1996490952
model-4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de73fb51878d99af9e560ccc36392d40bf8052114a55ddc7a9d48d18d940d21
3
+ size 1996490952
model-5.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c38be3d7a410510d190ed45f9b5f54b322a11eb27722a44b3a2eb96ce930a952
3
+ size 1996490936
model-6.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bec08c111a50de1cd41fb5d1268ff4bc463453061742fb06a09dca429220df
3
+ size 1996490944
model-7.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e058db1e56b36870d6b9427654ffca1e69b17d1753a6c9c72671b80d174cf3ed
3
+ size 1996490952
model-8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f0771061e6dbe07a9b7c891b7b817b0d02c1925819038e1196184b3277c6d0c
3
+ size 1996490944
model-9.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d64cb10858a71ae3b98f7b204a9205a028b57830c3af3170002bebfa4534818b
3
+ size 1996490936