|
--- |
|
tags: |
|
- merge |
|
- mergekit |
|
- lazymergekit |
|
- Locutusque/Hercules-3.1-Mistral-7B |
|
- cognitivecomputations/dolphin-2.8-experiment26-7b |
|
base_model: |
|
- Locutusque/Hercules-3.1-Mistral-7B |
|
- cognitivecomputations/dolphin-2.8-experiment26-7b |
|
license: apache-2.0 |
|
--- |
|
|
|
# JOSIE_Beta-3-7B-slerp |
|
|
|
JOSIE_Beta-3-7B-slerp is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing): |
|
* [Locutusque/Hercules-3.1-Mistral-7B](https://huggingface.co/Locutusque/Hercules-3.1-Mistral-7B) |
|
* [cognitivecomputations/dolphin-2.8-experiment26-7b](https://huggingface.co/cognitivecomputations/dolphin-2.8-experiment26-7b) |
|
|
|
# IMPORTANT!!! |
|
|
|
upon sseing the eval bechmarks on the LLM Leaderboard, this is the best performing model, but it's not uncensored, and it's answers are not really good when chatting with it. |
|
I will further train it one datasets like dolphin and other. |
|
|
|
```json |
|
{ |
|
"all": { |
|
"acc": 0.6432209013684985, |
|
"acc_stderr": 0.03221665824377992, |
|
"acc_norm": 0.6450099678239628, |
|
"acc_norm_stderr": 0.032867717920871294, |
|
"mc1": 0.3353733170134639, |
|
"mc1_stderr": 0.01652753403966899, |
|
"mc2": 0.48804542326643174, |
|
"mc2_stderr": 0.015087630632446147 |
|
}, |
|
"harness|arc:challenge|25": { |
|
"acc": 0.6083617747440273, |
|
"acc_stderr": 0.014264122124938217, |
|
"acc_norm": 0.6339590443686007, |
|
"acc_norm_stderr": 0.014077223108470139 |
|
}, |
|
"harness|hellaswag|10": { |
|
"acc": 0.6618203545110536, |
|
"acc_stderr": 0.0047212316370927225, |
|
"acc_norm": 0.8456482772356104, |
|
"acc_norm_stderr": 0.0036054721167622867 |
|
}, |
|
"harness|hendrycksTest-abstract_algebra|5": { |
|
"acc": 0.3, |
|
"acc_stderr": 0.046056618647183814, |
|
"acc_norm": 0.3, |
|
"acc_norm_stderr": 0.046056618647183814 |
|
}, |
|
"harness|hendrycksTest-anatomy|5": { |
|
"acc": 0.6074074074074074, |
|
"acc_stderr": 0.04218506215368879, |
|
"acc_norm": 0.6074074074074074, |
|
"acc_norm_stderr": 0.04218506215368879 |
|
}, |
|
"harness|hendrycksTest-astronomy|5": { |
|
"acc": 0.6710526315789473, |
|
"acc_stderr": 0.03823428969926605, |
|
"acc_norm": 0.6710526315789473, |
|
"acc_norm_stderr": 0.03823428969926605 |
|
}, |
|
"harness|hendrycksTest-business_ethics|5": { |
|
"acc": 0.61, |
|
"acc_stderr": 0.04902071300001975, |
|
"acc_norm": 0.61, |
|
"acc_norm_stderr": 0.04902071300001975 |
|
}, |
|
"harness|hendrycksTest-clinical_knowledge|5": { |
|
"acc": 0.7018867924528301, |
|
"acc_stderr": 0.02815283794249387, |
|
"acc_norm": 0.7018867924528301, |
|
"acc_norm_stderr": 0.02815283794249387 |
|
}, |
|
"harness|hendrycksTest-college_biology|5": { |
|
"acc": 0.7638888888888888, |
|
"acc_stderr": 0.03551446610810826, |
|
"acc_norm": 0.7638888888888888, |
|
"acc_norm_stderr": 0.03551446610810826 |
|
}, |
|
"harness|hendrycksTest-college_chemistry|5": { |
|
"acc": 0.44, |
|
"acc_stderr": 0.04988876515698589, |
|
"acc_norm": 0.44, |
|
"acc_norm_stderr": 0.04988876515698589 |
|
}, |
|
"harness|hendrycksTest-college_computer_science|5": { |
|
"acc": 0.52, |
|
"acc_stderr": 0.050211673156867795, |
|
"acc_norm": 0.52, |
|
"acc_norm_stderr": 0.050211673156867795 |
|
}, |
|
"harness|hendrycksTest-college_mathematics|5": { |
|
"acc": 0.37, |
|
"acc_stderr": 0.04852365870939099, |
|
"acc_norm": 0.37, |
|
"acc_norm_stderr": 0.04852365870939099 |
|
}, |
|
"harness|hendrycksTest-college_medicine|5": { |
|
"acc": 0.6473988439306358, |
|
"acc_stderr": 0.03643037168958548, |
|
"acc_norm": 0.6473988439306358, |
|
"acc_norm_stderr": 0.03643037168958548 |
|
}, |
|
"harness|hendrycksTest-college_physics|5": { |
|
"acc": 0.38235294117647056, |
|
"acc_stderr": 0.04835503696107223, |
|
"acc_norm": 0.38235294117647056, |
|
"acc_norm_stderr": 0.04835503696107223 |
|
}, |
|
"harness|hendrycksTest-computer_security|5": { |
|
"acc": 0.75, |
|
"acc_stderr": 0.04351941398892446, |
|
"acc_norm": 0.75, |
|
"acc_norm_stderr": 0.04351941398892446 |
|
}, |
|
"harness|hendrycksTest-conceptual_physics|5": { |
|
"acc": 0.574468085106383, |
|
"acc_stderr": 0.03232146916224468, |
|
"acc_norm": 0.574468085106383, |
|
"acc_norm_stderr": 0.03232146916224468 |
|
}, |
|
"harness|hendrycksTest-econometrics|5": { |
|
"acc": 0.4649122807017544, |
|
"acc_stderr": 0.046920083813689104, |
|
"acc_norm": 0.4649122807017544, |
|
"acc_norm_stderr": 0.046920083813689104 |
|
}, |
|
"harness|hendrycksTest-electrical_engineering|5": { |
|
"acc": 0.5310344827586206, |
|
"acc_stderr": 0.04158632762097828, |
|
"acc_norm": 0.5310344827586206, |
|
"acc_norm_stderr": 0.04158632762097828 |
|
}, |
|
"harness|hendrycksTest-elementary_mathematics|5": { |
|
"acc": 0.41534391534391535, |
|
"acc_stderr": 0.025379524910778405, |
|
"acc_norm": 0.41534391534391535, |
|
"acc_norm_stderr": 0.025379524910778405 |
|
}, |
|
"harness|hendrycksTest-formal_logic|5": { |
|
"acc": 0.46825396825396826, |
|
"acc_stderr": 0.04463112720677171, |
|
"acc_norm": 0.46825396825396826, |
|
"acc_norm_stderr": 0.04463112720677171 |
|
}, |
|
"harness|hendrycksTest-global_facts|5": { |
|
"acc": 0.35, |
|
"acc_stderr": 0.0479372485441102, |
|
"acc_norm": 0.35, |
|
"acc_norm_stderr": 0.0479372485441102 |
|
}, |
|
"harness|hendrycksTest-high_school_biology|5": { |
|
"acc": 0.7709677419354839, |
|
"acc_stderr": 0.023904914311782648, |
|
"acc_norm": 0.7709677419354839, |
|
"acc_norm_stderr": 0.023904914311782648 |
|
}, |
|
"harness|hendrycksTest-high_school_chemistry|5": { |
|
"acc": 0.5073891625615764, |
|
"acc_stderr": 0.035176035403610105, |
|
"acc_norm": 0.5073891625615764, |
|
"acc_norm_stderr": 0.035176035403610105 |
|
}, |
|
"harness|hendrycksTest-high_school_computer_science|5": { |
|
"acc": 0.69, |
|
"acc_stderr": 0.04648231987117316, |
|
"acc_norm": 0.69, |
|
"acc_norm_stderr": 0.04648231987117316 |
|
}, |
|
"harness|hendrycksTest-high_school_european_history|5": { |
|
"acc": 0.7757575757575758, |
|
"acc_stderr": 0.032568666616811015, |
|
"acc_norm": 0.7757575757575758, |
|
"acc_norm_stderr": 0.032568666616811015 |
|
}, |
|
"harness|hendrycksTest-high_school_geography|5": { |
|
"acc": 0.797979797979798, |
|
"acc_stderr": 0.028606204289229872, |
|
"acc_norm": 0.797979797979798, |
|
"acc_norm_stderr": 0.028606204289229872 |
|
}, |
|
"harness|hendrycksTest-high_school_government_and_politics|5": { |
|
"acc": 0.8963730569948186, |
|
"acc_stderr": 0.02199531196364424, |
|
"acc_norm": 0.8963730569948186, |
|
"acc_norm_stderr": 0.02199531196364424 |
|
}, |
|
"harness|hendrycksTest-high_school_macroeconomics|5": { |
|
"acc": 0.6641025641025641, |
|
"acc_stderr": 0.023946724741563973, |
|
"acc_norm": 0.6641025641025641, |
|
"acc_norm_stderr": 0.023946724741563973 |
|
}, |
|
"harness|hendrycksTest-high_school_mathematics|5": { |
|
"acc": 0.3888888888888889, |
|
"acc_stderr": 0.029723278961476664, |
|
"acc_norm": 0.3888888888888889, |
|
"acc_norm_stderr": 0.029723278961476664 |
|
}, |
|
"harness|hendrycksTest-high_school_microeconomics|5": { |
|
"acc": 0.680672268907563, |
|
"acc_stderr": 0.030283995525884396, |
|
"acc_norm": 0.680672268907563, |
|
"acc_norm_stderr": 0.030283995525884396 |
|
}, |
|
"harness|hendrycksTest-high_school_physics|5": { |
|
"acc": 0.3443708609271523, |
|
"acc_stderr": 0.038796870240733264, |
|
"acc_norm": 0.3443708609271523, |
|
"acc_norm_stderr": 0.038796870240733264 |
|
}, |
|
"harness|hendrycksTest-high_school_psychology|5": { |
|
"acc": 0.8422018348623853, |
|
"acc_stderr": 0.01563002297009245, |
|
"acc_norm": 0.8422018348623853, |
|
"acc_norm_stderr": 0.01563002297009245 |
|
}, |
|
"harness|hendrycksTest-high_school_statistics|5": { |
|
"acc": 0.5, |
|
"acc_stderr": 0.034099716973523674, |
|
"acc_norm": 0.5, |
|
"acc_norm_stderr": 0.034099716973523674 |
|
}, |
|
"harness|hendrycksTest-high_school_us_history|5": { |
|
"acc": 0.7990196078431373, |
|
"acc_stderr": 0.028125972265654366, |
|
"acc_norm": 0.7990196078431373, |
|
"acc_norm_stderr": 0.028125972265654366 |
|
}, |
|
"harness|hendrycksTest-high_school_world_history|5": { |
|
"acc": 0.7890295358649789, |
|
"acc_stderr": 0.02655837250266192, |
|
"acc_norm": 0.7890295358649789, |
|
"acc_norm_stderr": 0.02655837250266192 |
|
}, |
|
"harness|hendrycksTest-human_aging|5": { |
|
"acc": 0.695067264573991, |
|
"acc_stderr": 0.030898610882477515, |
|
"acc_norm": 0.695067264573991, |
|
"acc_norm_stderr": 0.030898610882477515 |
|
}, |
|
"harness|hendrycksTest-human_sexuality|5": { |
|
"acc": 0.7862595419847328, |
|
"acc_stderr": 0.0359546161177469, |
|
"acc_norm": 0.7862595419847328, |
|
"acc_norm_stderr": 0.0359546161177469 |
|
}, |
|
"harness|hendrycksTest-international_law|5": { |
|
"acc": 0.8016528925619835, |
|
"acc_stderr": 0.036401182719909476, |
|
"acc_norm": 0.8016528925619835, |
|
"acc_norm_stderr": 0.036401182719909476 |
|
}, |
|
"harness|hendrycksTest-jurisprudence|5": { |
|
"acc": 0.7870370370370371, |
|
"acc_stderr": 0.0395783547198098, |
|
"acc_norm": 0.7870370370370371, |
|
"acc_norm_stderr": 0.0395783547198098 |
|
}, |
|
"harness|hendrycksTest-logical_fallacies|5": { |
|
"acc": 0.754601226993865, |
|
"acc_stderr": 0.03380939813943354, |
|
"acc_norm": 0.754601226993865, |
|
"acc_norm_stderr": 0.03380939813943354 |
|
}, |
|
"harness|hendrycksTest-machine_learning|5": { |
|
"acc": 0.5535714285714286, |
|
"acc_stderr": 0.04718471485219587, |
|
"acc_norm": 0.5535714285714286, |
|
"acc_norm_stderr": 0.04718471485219587 |
|
}, |
|
"harness|hendrycksTest-management|5": { |
|
"acc": 0.7766990291262136, |
|
"acc_stderr": 0.04123553189891431, |
|
"acc_norm": 0.7766990291262136, |
|
"acc_norm_stderr": 0.04123553189891431 |
|
}, |
|
"harness|hendrycksTest-marketing|5": { |
|
"acc": 0.8760683760683761, |
|
"acc_stderr": 0.021586494001281376, |
|
"acc_norm": 0.8760683760683761, |
|
"acc_norm_stderr": 0.021586494001281376 |
|
}, |
|
"harness|hendrycksTest-medical_genetics|5": { |
|
"acc": 0.74, |
|
"acc_stderr": 0.04408440022768079, |
|
"acc_norm": 0.74, |
|
"acc_norm_stderr": 0.04408440022768079 |
|
}, |
|
"harness|hendrycksTest-miscellaneous|5": { |
|
"acc": 0.8186462324393359, |
|
"acc_stderr": 0.01377869377846408, |
|
"acc_norm": 0.8186462324393359, |
|
"acc_norm_stderr": 0.01377869377846408 |
|
}, |
|
"harness|hendrycksTest-moral_disputes|5": { |
|
"acc": 0.7225433526011561, |
|
"acc_stderr": 0.024105712607754307, |
|
"acc_norm": 0.7225433526011561, |
|
"acc_norm_stderr": 0.024105712607754307 |
|
}, |
|
"harness|hendrycksTest-moral_scenarios|5": { |
|
"acc": 0.288268156424581, |
|
"acc_stderr": 0.015149132860209432, |
|
"acc_norm": 0.288268156424581, |
|
"acc_norm_stderr": 0.015149132860209432 |
|
}, |
|
"harness|hendrycksTest-nutrition|5": { |
|
"acc": 0.7189542483660131, |
|
"acc_stderr": 0.025738854797818733, |
|
"acc_norm": 0.7189542483660131, |
|
"acc_norm_stderr": 0.025738854797818733 |
|
}, |
|
"harness|hendrycksTest-philosophy|5": { |
|
"acc": 0.7170418006430869, |
|
"acc_stderr": 0.025583062489984813, |
|
"acc_norm": 0.7170418006430869, |
|
"acc_norm_stderr": 0.025583062489984813 |
|
}, |
|
"harness|hendrycksTest-prehistory|5": { |
|
"acc": 0.7407407407407407, |
|
"acc_stderr": 0.024383665531035457, |
|
"acc_norm": 0.7407407407407407, |
|
"acc_norm_stderr": 0.024383665531035457 |
|
}, |
|
"harness|hendrycksTest-professional_accounting|5": { |
|
"acc": 0.5035460992907801, |
|
"acc_stderr": 0.02982674915328092, |
|
"acc_norm": 0.5035460992907801, |
|
"acc_norm_stderr": 0.02982674915328092 |
|
}, |
|
"harness|hendrycksTest-professional_law|5": { |
|
"acc": 0.4680573663624511, |
|
"acc_stderr": 0.012744149704869647, |
|
"acc_norm": 0.4680573663624511, |
|
"acc_norm_stderr": 0.012744149704869647 |
|
}, |
|
"harness|hendrycksTest-professional_medicine|5": { |
|
"acc": 0.6838235294117647, |
|
"acc_stderr": 0.028245687391462927, |
|
"acc_norm": 0.6838235294117647, |
|
"acc_norm_stderr": 0.028245687391462927 |
|
}, |
|
"harness|hendrycksTest-professional_psychology|5": { |
|
"acc": 0.6633986928104575, |
|
"acc_stderr": 0.019117213911495158, |
|
"acc_norm": 0.6633986928104575, |
|
"acc_norm_stderr": 0.019117213911495158 |
|
}, |
|
"harness|hendrycksTest-public_relations|5": { |
|
"acc": 0.6636363636363637, |
|
"acc_stderr": 0.04525393596302506, |
|
"acc_norm": 0.6636363636363637, |
|
"acc_norm_stderr": 0.04525393596302506 |
|
}, |
|
"harness|hendrycksTest-security_studies|5": { |
|
"acc": 0.7428571428571429, |
|
"acc_stderr": 0.027979823538744546, |
|
"acc_norm": 0.7428571428571429, |
|
"acc_norm_stderr": 0.027979823538744546 |
|
}, |
|
"harness|hendrycksTest-sociology|5": { |
|
"acc": 0.845771144278607, |
|
"acc_stderr": 0.025538433368578337, |
|
"acc_norm": 0.845771144278607, |
|
"acc_norm_stderr": 0.025538433368578337 |
|
}, |
|
"harness|hendrycksTest-us_foreign_policy|5": { |
|
"acc": 0.87, |
|
"acc_stderr": 0.033799766898963086, |
|
"acc_norm": 0.87, |
|
"acc_norm_stderr": 0.033799766898963086 |
|
}, |
|
"harness|hendrycksTest-virology|5": { |
|
"acc": 0.5301204819277109, |
|
"acc_stderr": 0.03885425420866767, |
|
"acc_norm": 0.5301204819277109, |
|
"acc_norm_stderr": 0.03885425420866767 |
|
}, |
|
"harness|hendrycksTest-world_religions|5": { |
|
"acc": 0.8128654970760234, |
|
"acc_stderr": 0.02991312723236804, |
|
"acc_norm": 0.8128654970760234, |
|
"acc_norm_stderr": 0.02991312723236804 |
|
}, |
|
"harness|truthfulqa:mc|0": { |
|
"mc1": 0.3353733170134639, |
|
"mc1_stderr": 0.01652753403966899, |
|
"mc2": 0.48804542326643174, |
|
"mc2_stderr": 0.015087630632446147 |
|
}, |
|
"harness|winogrande|5": { |
|
"acc": 0.8042620363062352, |
|
"acc_stderr": 0.011151145042218319 |
|
}, |
|
"harness|gsm8k|5": { |
|
"acc": 0.5860500379075056, |
|
"acc_stderr": 0.013566991960151778 |
|
} |
|
} |
|
|
|
``` |
|
|
|
## 🧩 Configuration |
|
|
|
```yaml |
|
slices: |
|
- sources: |
|
- model: Locutusque/Hercules-3.1-Mistral-7B |
|
layer_range: [0, 32] |
|
- model: cognitivecomputations/dolphin-2.8-experiment26-7b |
|
layer_range: [0, 32] |
|
merge_method: slerp |
|
base_model: Locutusque/Hercules-3.1-Mistral-7B |
|
parameters: |
|
t: |
|
- filter: self_attn |
|
value: [0, 0.5, 0.3, 0.7, 1] |
|
- filter: mlp |
|
value: [1, 0.5, 0.7, 0.3, 0] |
|
- value: 0.5 |
|
dtype: bfloat16 |
|
``` |
|
|
|
## 💻 Usage |
|
|
|
```python |
|
!pip install -qU transformers accelerate |
|
|
|
from transformers import AutoTokenizer |
|
import transformers |
|
import torch |
|
|
|
model = "Isaak-Carter/JOSIE_Beta-3-7B-slerp" |
|
messages = [{"role": "user", "content": "What is a large language model?"}] |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model) |
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
pipeline = transformers.pipeline( |
|
"text-generation", |
|
model=model, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
) |
|
|
|
outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) |
|
print(outputs[0]["generated_text"]) |
|
``` |