RefalMachine commited on
Commit
d2dce0e
1 Parent(s): 83197a4

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. config.json +29 -0
  3. generation_config.json +11 -0
  4. llmtf_eval/daru_treewayabstractive.jsonl +0 -0
  5. llmtf_eval/daru_treewayabstractive_params.jsonl +58 -0
  6. llmtf_eval/daru_treewayabstractive_total.jsonl +8 -0
  7. llmtf_eval/daru_treewayextractive.jsonl +3 -0
  8. llmtf_eval/daru_treewayextractive_params.jsonl +58 -0
  9. llmtf_eval/daru_treewayextractive_total.jsonl +7 -0
  10. llmtf_eval/darumeru_MultiQ.jsonl +0 -0
  11. llmtf_eval/darumeru_MultiQ_params.jsonl +58 -0
  12. llmtf_eval/darumeru_MultiQ_total.jsonl +8 -0
  13. llmtf_eval/darumeru_PARus.jsonl +0 -0
  14. llmtf_eval/darumeru_PARus_params.jsonl +58 -0
  15. llmtf_eval/darumeru_PARus_total.jsonl +7 -0
  16. llmtf_eval/darumeru_RCB.jsonl +0 -0
  17. llmtf_eval/darumeru_RCB_params.jsonl +58 -0
  18. llmtf_eval/darumeru_RCB_total.jsonl +8 -0
  19. llmtf_eval/darumeru_RWSD.jsonl +0 -0
  20. llmtf_eval/darumeru_RWSD_params.jsonl +58 -0
  21. llmtf_eval/darumeru_RWSD_total.jsonl +7 -0
  22. llmtf_eval/darumeru_USE.jsonl +0 -0
  23. llmtf_eval/darumeru_USE_params.jsonl +58 -0
  24. llmtf_eval/darumeru_USE_total.jsonl +7 -0
  25. llmtf_eval/darumeru_cp_para_en.jsonl +0 -0
  26. llmtf_eval/darumeru_cp_para_en_params.jsonl +58 -0
  27. llmtf_eval/darumeru_cp_para_en_total.jsonl +9 -0
  28. llmtf_eval/darumeru_cp_para_ru.jsonl +0 -0
  29. llmtf_eval/darumeru_cp_para_ru_params.jsonl +58 -0
  30. llmtf_eval/darumeru_cp_para_ru_total.jsonl +9 -0
  31. llmtf_eval/darumeru_cp_sent_en.jsonl +0 -0
  32. llmtf_eval/darumeru_cp_sent_en_params.jsonl +58 -0
  33. llmtf_eval/darumeru_cp_sent_en_total.jsonl +9 -0
  34. llmtf_eval/darumeru_cp_sent_ru.jsonl +0 -0
  35. llmtf_eval/darumeru_cp_sent_ru_params.jsonl +58 -0
  36. llmtf_eval/darumeru_cp_sent_ru_total.jsonl +9 -0
  37. llmtf_eval/darumeru_ruMMLU.jsonl +3 -0
  38. llmtf_eval/darumeru_ruMMLU_params.jsonl +58 -0
  39. llmtf_eval/darumeru_ruMMLU_total.jsonl +7 -0
  40. llmtf_eval/darumeru_ruOpenBookQA.jsonl +0 -0
  41. llmtf_eval/darumeru_ruOpenBookQA_params.jsonl +58 -0
  42. llmtf_eval/darumeru_ruOpenBookQA_total.jsonl +8 -0
  43. llmtf_eval/darumeru_ruWorldTree.jsonl +0 -0
  44. llmtf_eval/darumeru_ruWorldTree_params.jsonl +58 -0
  45. llmtf_eval/darumeru_ruWorldTree_total.jsonl +8 -0
  46. llmtf_eval/evaluation_log.txt +267 -0
  47. llmtf_eval/evaluation_results.txt +2 -0
  48. llmtf_eval/nlpcoreteam_enMMLU.jsonl +3 -0
  49. llmtf_eval/nlpcoreteam_enMMLU_params.jsonl +58 -0
  50. llmtf_eval/nlpcoreteam_enMMLU_total.jsonl +7 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llmtf_eval/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ llmtf_eval/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ llmtf_eval/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ llmtf_eval/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/workdir/data/models/saiga_llama_extended_full_instruct_v2_conversion_v4_5e5_d7_copy_wiki_v2_merged_ablitirated",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 174561,
9
+ "eos_token_id": 174562,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 14336,
14
+ "max_position_embeddings": 8192,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 8,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 500000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.42.4",
27
+ "use_cache": true,
28
+ "vocab_size": 174817
29
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 174561,
3
+ "do_sample": true,
4
+ "eos_token_id": 174570,
5
+ "max_length": 4096,
6
+ "repetition_penalty": 1.05,
7
+ "temperature": 0.3,
8
+ "top_k": 40,
9
+ "top_p": 0.9,
10
+ "transformers_version": "4.42.4"
11
+ }
llmtf_eval/daru_treewayabstractive.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/daru_treewayabstractive_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 512,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 500,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/daru_treewayabstractive_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayabstractive",
3
+ "results": {
4
+ "rouge1": 0.3550759319426331,
5
+ "rouge2": 0.12663323877762525
6
+ },
7
+ "leaderboard_result": 0.24085458536012916
8
+ }
llmtf_eval/daru_treewayextractive.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3732197c9cbd40db709e5cebfb9181260d867a06b8bc67f3d454e7866aacf8e1
3
+ size 225237835
llmtf_eval/daru_treewayextractive_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 1,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_logsoftmax"
57
+ }
58
+ }
llmtf_eval/daru_treewayextractive_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayextractive",
3
+ "results": {
4
+ "r-prec": 0.4306020202020202
5
+ },
6
+ "leaderboard_result": 0.4306020202020202
7
+ }
llmtf_eval/darumeru_MultiQ.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_MultiQ_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/darumeru_MultiQ_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/MultiQ",
3
+ "results": {
4
+ "f1": 0.2660316706577536,
5
+ "em": 0.15487571701720843
6
+ },
7
+ "leaderboard_result": 0.21045369383748103
8
+ }
llmtf_eval/darumeru_PARus.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_PARus_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/darumeru_PARus_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/PARus",
3
+ "results": {
4
+ "acc": 0.61
5
+ },
6
+ "leaderboard_result": 0.61
7
+ }
llmtf_eval/darumeru_RCB.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_RCB_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/darumeru_RCB_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RCB",
3
+ "results": {
4
+ "acc": 0.4590909090909091,
5
+ "f1_macro": 0.41511023060616065
6
+ },
7
+ "leaderboard_result": 0.43710056984853485
8
+ }
llmtf_eval/darumeru_RWSD.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_RWSD_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/darumeru_RWSD_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RWSD",
3
+ "results": {
4
+ "acc": 0.5686274509803921
5
+ },
6
+ "leaderboard_result": 0.5686274509803921
7
+ }
llmtf_eval/darumeru_USE.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_USE_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/darumeru_USE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/USE",
3
+ "results": {
4
+ "grade_norm": 0.0931372549019608
5
+ },
6
+ "leaderboard_result": 0.0931372549019608
7
+ }
llmtf_eval/darumeru_cp_para_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_cp_para_en_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 1024,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/darumeru_cp_para_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_en",
3
+ "results": {
4
+ "symbol_per_token": 4.521261233892721,
5
+ "len": 0.9996383092887717,
6
+ "lcs": 1.0
7
+ },
8
+ "leaderboard_result": 1.0
9
+ }
llmtf_eval/darumeru_cp_para_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_cp_para_ru_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 1024,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/darumeru_cp_para_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_ru",
3
+ "results": {
4
+ "symbol_per_token": 3.7865670306157933,
5
+ "len": 0.9983390643026695,
6
+ "lcs": 0.97
7
+ },
8
+ "leaderboard_result": 0.97
9
+ }
llmtf_eval/darumeru_cp_sent_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_cp_sent_en_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 128,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/darumeru_cp_sent_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_en",
3
+ "results": {
4
+ "symbol_per_token": 4.394786509645572,
5
+ "len": 0.9984205596649908,
6
+ "lcs": 0.9939024390243902
7
+ },
8
+ "leaderboard_result": 0.9984205596649908
9
+ }
llmtf_eval/darumeru_cp_sent_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_cp_sent_ru_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 128,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "generate"
57
+ }
58
+ }
llmtf_eval/darumeru_cp_sent_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_ru",
3
+ "results": {
4
+ "symbol_per_token": 3.705603375484548,
5
+ "len": 0.9949612573353719,
6
+ "lcs": 0.9404517453798767
7
+ },
8
+ "leaderboard_result": 0.9949612573353719
9
+ }
llmtf_eval/darumeru_ruMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8fb11c0eaae56d36334195b05ca69a7d57a239fc0887c2c1da284353ba78897
3
+ size 32888497
llmtf_eval/darumeru_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/darumeru_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5018457547640427
5
+ },
6
+ "leaderboard_result": 0.5018457547640427
7
+ }
llmtf_eval/darumeru_ruOpenBookQA.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_ruOpenBookQA_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/darumeru_ruOpenBookQA_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruOpenBookQA",
3
+ "results": {
4
+ "acc": 0.7246563573883161,
5
+ "f1_macro": 0.7254261079279148
6
+ },
7
+ "leaderboard_result": 0.7250412326581155
8
+ }
llmtf_eval/darumeru_ruWorldTree.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval/darumeru_ruWorldTree_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/darumeru_ruWorldTree_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruWorldTree",
3
+ "results": {
4
+ "acc": 0.8666666666666667,
5
+ "f1_macro": 0.8640387481371088
6
+ },
7
+ "leaderboard_result": 0.8653527074018877
8
+ }
llmtf_eval/evaluation_log.txt ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO: 2024-08-28 09:38:05,098: llmtf.base.evaluator: Starting eval on ['darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/ruworldtree', 'darumeru/rwsd', 'russiannlp/rucola_custom']
2
+ INFO: 2024-08-28 09:38:05,099: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
3
+ INFO: 2024-08-28 09:38:05,099: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
4
+ INFO: 2024-08-28 09:38:07,960: llmtf.base.darumeru/PARus: Loading Dataset: 2.86s
5
+ INFO: 2024-08-28 09:38:07,992: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu', 'daru/treewayextractive']
6
+ INFO: 2024-08-28 09:38:07,992: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
7
+ INFO: 2024-08-28 09:38:07,992: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
8
+ INFO: 2024-08-28 09:38:09,381: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu', 'nlpcoreteam/enmmlu']
9
+ INFO: 2024-08-28 09:38:09,381: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
10
+ INFO: 2024-08-28 09:38:09,381: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
11
+ INFO: 2024-08-28 09:38:11,476: llmtf.base.darumeru/PARus: Processing Dataset: 3.52s
12
+ INFO: 2024-08-28 09:38:11,476: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
13
+ INFO: 2024-08-28 09:38:11,487: llmtf.base.darumeru/PARus: {'acc': 0.61}
14
+ INFO: 2024-08-28 09:38:11,487: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
15
+ INFO: 2024-08-28 09:38:11,487: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
16
+ INFO: 2024-08-28 09:38:13,100: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
17
+ INFO: 2024-08-28 09:38:13,100: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
18
+ INFO: 2024-08-28 09:38:13,100: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
19
+ INFO: 2024-08-28 09:38:13,501: llmtf.base.darumeru/RCB: Loading Dataset: 2.01s
20
+ INFO: 2024-08-28 09:38:13,809: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/use']
21
+ INFO: 2024-08-28 09:38:13,809: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
22
+ INFO: 2024-08-28 09:38:13,809: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
23
+ INFO: 2024-08-28 09:38:15,664: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_para_ru']
24
+ INFO: 2024-08-28 09:38:15,664: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
25
+ INFO: 2024-08-28 09:38:15,664: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
26
+ INFO: 2024-08-28 09:38:16,575: llmtf.base.darumeru/ruMMLU: Loading Dataset: 8.58s
27
+ INFO: 2024-08-28 09:38:17,521: llmtf.base.darumeru/MultiQ: Loading Dataset: 3.71s
28
+ INFO: 2024-08-28 09:38:17,560: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.46s
29
+ INFO: 2024-08-28 09:38:17,771: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_en', 'darumeru/cp_para_en']
30
+ INFO: 2024-08-28 09:38:17,772: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
31
+ INFO: 2024-08-28 09:38:17,772: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
32
+ INFO: 2024-08-28 09:38:18,163: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 2.50s
33
+ INFO: 2024-08-28 09:38:18,968: llmtf.base.darumeru/RCB: Processing Dataset: 5.47s
34
+ INFO: 2024-08-28 09:38:18,968: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
35
+ INFO: 2024-08-28 09:38:18,971: llmtf.base.darumeru/RCB: {'acc': 0.4590909090909091, 'f1_macro': 0.41511023060616065}
36
+ INFO: 2024-08-28 09:38:18,972: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
37
+ INFO: 2024-08-28 09:38:18,972: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
38
+ INFO: 2024-08-28 09:38:20,244: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 2.47s
39
+ INFO: 2024-08-28 09:38:21,566: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 2.59s
40
+ INFO: 2024-08-28 09:38:59,395: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 37.83s
41
+ INFO: 2024-08-28 09:38:59,396: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
42
+ INFO: 2024-08-28 09:38:59,405: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.7246563573883161, 'f1_macro': 0.7254261079279148}
43
+ INFO: 2024-08-28 09:38:59,412: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
44
+ INFO: 2024-08-28 09:38:59,413: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
45
+ INFO: 2024-08-28 09:39:01,164: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 1.75s
46
+ INFO: 2024-08-28 09:39:03,177: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 2.01s
47
+ INFO: 2024-08-28 09:39:03,177: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
48
+ INFO: 2024-08-28 09:39:03,179: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8666666666666667, 'f1_macro': 0.8640387481371088}
49
+ INFO: 2024-08-28 09:39:03,179: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
50
+ INFO: 2024-08-28 09:39:03,180: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
51
+ INFO: 2024-08-28 09:39:05,009: llmtf.base.darumeru/RWSD: Loading Dataset: 1.83s
52
+ INFO: 2024-08-28 09:39:10,969: llmtf.base.darumeru/RWSD: Processing Dataset: 5.96s
53
+ INFO: 2024-08-28 09:39:10,969: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
54
+ INFO: 2024-08-28 09:39:10,970: llmtf.base.darumeru/RWSD: {'acc': 0.5686274509803921}
55
+ INFO: 2024-08-28 09:39:10,971: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
56
+ INFO: 2024-08-28 09:39:10,971: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
57
+ INFO: 2024-08-28 09:39:14,992: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 4.02s
58
+ INFO: 2024-08-28 09:39:58,707: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 43.71s
59
+ INFO: 2024-08-28 09:39:58,707: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
60
+ INFO: 2024-08-28 09:39:58,716: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7100825260136348, 'mcc': 0.2607217783495962}
61
+ INFO: 2024-08-28 09:39:58,720: llmtf.base.evaluator: Ended eval
62
+ INFO: 2024-08-28 09:39:58,720: llmtf.base.evaluator:
63
+ mean darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/ruOpenBookQA darumeru/ruWorldTree russiannlp/rucola_custom
64
+ 0.615 0.610 0.437 0.569 0.725 0.865 0.485
65
+ INFO: 2024-08-28 09:41:10,442: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 170.20s
66
+ INFO: 2024-08-28 09:41:10,443: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
67
+ INFO: 2024-08-28 09:41:10,444: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.394786509645572, 'len': 0.9984205596649908, 'lcs': 0.9939024390243902}
68
+ INFO: 2024-08-28 09:41:10,445: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
69
+ INFO: 2024-08-28 09:41:10,445: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
70
+ INFO: 2024-08-28 09:41:12,462: llmtf.base.darumeru/cp_para_en: Loading Dataset: 2.02s
71
+ INFO: 2024-08-28 09:41:15,580: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 177.42s
72
+ INFO: 2024-08-28 09:41:15,580: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
73
+ INFO: 2024-08-28 09:41:15,581: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 3.705603375484548, 'len': 0.9949612573353719, 'lcs': 0.9404517453798767}
74
+ INFO: 2024-08-28 09:41:15,582: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
75
+ INFO: 2024-08-28 09:41:15,582: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
76
+ INFO: 2024-08-28 09:41:17,523: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 1.94s
77
+ INFO: 2024-08-28 09:43:35,734: llmtf.base.darumeru/cp_para_en: Processing Dataset: 143.27s
78
+ INFO: 2024-08-28 09:43:35,734: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
79
+ INFO: 2024-08-28 09:43:35,735: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.521261233892721, 'len': 0.9996383092887717, 'lcs': 1.0}
80
+ INFO: 2024-08-28 09:43:35,735: llmtf.base.evaluator: Ended eval
81
+ INFO: 2024-08-28 09:43:35,736: llmtf.base.evaluator:
82
+ mean darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruOpenBookQA darumeru/ruWorldTree russiannlp/rucola_custom
83
+ 0.743 0.610 0.437 0.569 1.000 0.998 0.995 0.725 0.865 0.485
84
+ INFO: 2024-08-28 09:43:52,340: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 342.96s
85
+ INFO: 2024-08-28 09:43:56,137: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 158.61s
86
+ INFO: 2024-08-28 09:43:56,138: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
87
+ INFO: 2024-08-28 09:43:56,138: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 3.7865670306157933, 'len': 0.9983390643026695, 'lcs': 0.97}
88
+ INFO: 2024-08-28 09:43:56,139: llmtf.base.evaluator: Ended eval
89
+ INFO: 2024-08-28 09:43:56,139: llmtf.base.evaluator:
90
+ mean darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruOpenBookQA darumeru/ruWorldTree russiannlp/rucola_custom
91
+ 0.765 0.610 0.437 0.569 1.000 0.970 0.998 0.995 0.725 0.865 0.485
92
+ INFO: 2024-08-28 09:44:12,518: llmtf.base.darumeru/ruMMLU: Processing Dataset: 355.94s
93
+ INFO: 2024-08-28 09:44:12,518: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
94
+ INFO: 2024-08-28 09:44:12,524: llmtf.base.darumeru/ruMMLU: {'acc': 0.5018457547640427}
95
+ INFO: 2024-08-28 09:44:12,564: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
96
+ INFO: 2024-08-28 09:44:12,564: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
97
+ INFO: 2024-08-28 09:44:25,707: llmtf.base.daru/treewayextractive: Loading Dataset: 13.14s
98
+ INFO: 2024-08-28 09:44:33,920: llmtf.base.darumeru/MultiQ: Processing Dataset: 376.40s
99
+ INFO: 2024-08-28 09:44:33,920: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
100
+ INFO: 2024-08-28 09:44:33,922: llmtf.base.darumeru/MultiQ: {'f1': 0.2660316706577536, 'em': 0.15487571701720843}
101
+ INFO: 2024-08-28 09:44:33,927: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
102
+ INFO: 2024-08-28 09:44:33,927: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
103
+ INFO: 2024-08-28 09:44:37,006: llmtf.base.darumeru/USE: Loading Dataset: 3.08s
104
+ INFO: 2024-08-28 09:45:49,905: llmtf.base.daru/treewayabstractive: Processing Dataset: 452.34s
105
+ INFO: 2024-08-28 09:45:49,905: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
106
+ INFO: 2024-08-28 09:45:49,906: llmtf.base.daru/treewayabstractive: {'rouge1': 0.3550759319426331, 'rouge2': 0.12663323877762525}
107
+ INFO: 2024-08-28 09:45:49,909: llmtf.base.evaluator: Ended eval
108
+ INFO: 2024-08-28 09:45:49,910: llmtf.base.evaluator:
109
+ mean daru/treewayabstractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruWorldTree russiannlp/rucola_custom
110
+ 0.662 0.241 0.210 0.610 0.437 0.569 1.000 0.970 0.998 0.995 0.502 0.725 0.865 0.485
111
+ INFO: 2024-08-28 09:48:00,114: llmtf.base.darumeru/USE: Processing Dataset: 203.11s
112
+ INFO: 2024-08-28 09:48:00,115: llmtf.base.darumeru/USE: Results for darumeru/USE:
113
+ INFO: 2024-08-28 09:48:00,116: llmtf.base.darumeru/USE: {'grade_norm': 0.0931372549019608}
114
+ INFO: 2024-08-28 09:48:00,119: llmtf.base.evaluator: Ended eval
115
+ INFO: 2024-08-28 09:48:00,119: llmtf.base.evaluator:
116
+ mean daru/treewayabstractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruWorldTree russiannlp/rucola_custom
117
+ 0.622 0.241 0.210 0.610 0.437 0.569 0.093 1.000 0.970 0.998 0.995 0.502 0.725 0.865 0.485
118
+ INFO: 2024-08-28 09:49:59,058: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 366.72s
119
+ INFO: 2024-08-28 09:49:59,059: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
120
+ INFO: 2024-08-28 09:49:59,121: llmtf.base.nlpcoreteam/ruMMLU: metric
121
+ subject
122
+ abstract_algebra 0.270000
123
+ anatomy 0.400000
124
+ astronomy 0.677632
125
+ business_ethics 0.560000
126
+ clinical_knowledge 0.584906
127
+ college_biology 0.506944
128
+ college_chemistry 0.420000
129
+ college_computer_science 0.430000
130
+ college_mathematics 0.310000
131
+ college_medicine 0.549133
132
+ college_physics 0.352941
133
+ computer_security 0.530000
134
+ conceptual_physics 0.493617
135
+ econometrics 0.377193
136
+ electrical_engineering 0.503448
137
+ elementary_mathematics 0.362434
138
+ formal_logic 0.404762
139
+ global_facts 0.310000
140
+ high_school_biology 0.674194
141
+ high_school_chemistry 0.413793
142
+ high_school_computer_science 0.620000
143
+ high_school_european_history 0.709091
144
+ high_school_geography 0.686869
145
+ high_school_government_and_politics 0.616580
146
+ high_school_macroeconomics 0.507692
147
+ high_school_mathematics 0.355556
148
+ high_school_microeconomics 0.516807
149
+ high_school_physics 0.377483
150
+ high_school_psychology 0.684404
151
+ high_school_statistics 0.467593
152
+ high_school_us_history 0.686275
153
+ high_school_world_history 0.725738
154
+ human_aging 0.529148
155
+ human_sexuality 0.610687
156
+ international_law 0.652893
157
+ jurisprudence 0.601852
158
+ logical_fallacies 0.509202
159
+ machine_learning 0.330357
160
+ management 0.669903
161
+ marketing 0.722222
162
+ medical_genetics 0.550000
163
+ miscellaneous 0.627075
164
+ moral_disputes 0.572254
165
+ moral_scenarios 0.218994
166
+ nutrition 0.601307
167
+ philosophy 0.598071
168
+ prehistory 0.533951
169
+ professional_accounting 0.382979
170
+ professional_law 0.355280
171
+ professional_medicine 0.533088
172
+ professional_psychology 0.486928
173
+ public_relations 0.563636
174
+ security_studies 0.616327
175
+ sociology 0.726368
176
+ us_foreign_policy 0.730000
177
+ virology 0.463855
178
+ world_religions 0.678363
179
+ INFO: 2024-08-28 09:49:59,130: llmtf.base.nlpcoreteam/ruMMLU: metric
180
+ subject
181
+ STEM 0.449777
182
+ humanities 0.557440
183
+ other (business, health, misc.) 0.534544
184
+ social sciences 0.593624
185
+ INFO: 2024-08-28 09:49:59,135: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.53384650825764}
186
+ INFO: 2024-08-28 09:49:59,170: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [174570]
187
+ INFO: 2024-08-28 09:49:59,170: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|eot_id|>']
188
+ INFO: 2024-08-28 09:50:43,640: llmtf.base.daru/treewayextractive: Processing Dataset: 377.93s
189
+ INFO: 2024-08-28 09:50:43,640: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
190
+ INFO: 2024-08-28 09:50:43,882: llmtf.base.daru/treewayextractive: {'r-prec': 0.4306020202020202}
191
+ INFO: 2024-08-28 09:50:43,926: llmtf.base.evaluator: Ended eval
192
+ INFO: 2024-08-28 09:50:43,927: llmtf.base.evaluator:
193
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruWorldTree nlpcoreteam/ruMMLU russiannlp/rucola_custom
194
+ 0.604 0.241 0.431 0.210 0.610 0.437 0.569 0.093 1.000 0.970 0.998 0.995 0.502 0.725 0.865 0.534 0.485
195
+ INFO: 2024-08-28 09:51:38,740: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 99.57s
196
+ INFO: 2024-08-28 09:56:59,217: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 320.48s
197
+ INFO: 2024-08-28 09:56:59,217: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
198
+ INFO: 2024-08-28 09:56:59,279: llmtf.base.nlpcoreteam/enMMLU: metric
199
+ subject
200
+ abstract_algebra 0.310000
201
+ anatomy 0.622222
202
+ astronomy 0.723684
203
+ business_ethics 0.600000
204
+ clinical_knowledge 0.720755
205
+ college_biology 0.777778
206
+ college_chemistry 0.460000
207
+ college_computer_science 0.550000
208
+ college_mathematics 0.390000
209
+ college_medicine 0.647399
210
+ college_physics 0.421569
211
+ computer_security 0.780000
212
+ conceptual_physics 0.561702
213
+ econometrics 0.412281
214
+ electrical_engineering 0.572414
215
+ elementary_mathematics 0.447090
216
+ formal_logic 0.523810
217
+ global_facts 0.440000
218
+ high_school_biology 0.803226
219
+ high_school_chemistry 0.512315
220
+ high_school_computer_science 0.710000
221
+ high_school_european_history 0.727273
222
+ high_school_geography 0.772727
223
+ high_school_government_and_politics 0.849741
224
+ high_school_macroeconomics 0.661538
225
+ high_school_mathematics 0.374074
226
+ high_school_microeconomics 0.747899
227
+ high_school_physics 0.417219
228
+ high_school_psychology 0.840367
229
+ high_school_statistics 0.611111
230
+ high_school_us_history 0.789216
231
+ high_school_world_history 0.818565
232
+ human_aging 0.677130
233
+ human_sexuality 0.763359
234
+ international_law 0.743802
235
+ jurisprudence 0.777778
236
+ logical_fallacies 0.773006
237
+ machine_learning 0.464286
238
+ management 0.825243
239
+ marketing 0.854701
240
+ medical_genetics 0.760000
241
+ miscellaneous 0.817369
242
+ moral_disputes 0.664740
243
+ moral_scenarios 0.448045
244
+ nutrition 0.735294
245
+ philosophy 0.678457
246
+ prehistory 0.682099
247
+ professional_accounting 0.524823
248
+ professional_law 0.453716
249
+ professional_medicine 0.764706
250
+ professional_psychology 0.643791
251
+ public_relations 0.654545
252
+ security_studies 0.685714
253
+ sociology 0.830846
254
+ us_foreign_policy 0.820000
255
+ virology 0.500000
256
+ world_religions 0.789474
257
+ INFO: 2024-08-28 09:56:59,286: llmtf.base.nlpcoreteam/enMMLU: metric
258
+ subject
259
+ STEM 0.549248
260
+ humanities 0.682306
261
+ other (business, health, misc.) 0.677832
262
+ social sciences 0.723567
263
+ INFO: 2024-08-28 09:56:59,291: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6582382725054904}
264
+ INFO: 2024-08-28 09:56:59,323: llmtf.base.evaluator: Ended eval
265
+ INFO: 2024-08-28 09:56:59,325: llmtf.base.evaluator:
266
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
267
+ 0.607 0.241 0.431 0.210 0.610 0.437 0.569 0.093 1.000 0.970 0.998 0.995 0.502 0.725 0.865 0.658 0.534 0.485
llmtf_eval/evaluation_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.607 0.241 0.431 0.210 0.610 0.437 0.569 0.093 1.000 0.970 0.998 0.995 0.502 0.725 0.865 0.658 0.534 0.485
llmtf_eval/nlpcoreteam_enMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e1e13850087dacbdeb63e857309fcf93dd2e4c891c65dfa5a6e1172e1ac9a4
3
+ size 38064252
llmtf_eval/nlpcoreteam_enMMLU_params.jsonl ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "/workdir/data/models/ruadapt_llama_saiga_kto_ablitirated_merged",
5
+ "generation_config": {
6
+ "bos_token_id": 174561,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 174570
10
+ ],
11
+ "max_length": 8192,
12
+ "max_new_tokens": 64,
13
+ "pad_token_id": 174561,
14
+ "stop_strings": [
15
+ "<|eot_id|>"
16
+ ],
17
+ "temperature": 0.1,
18
+ "top_k": 40,
19
+ "top_p": 0.9,
20
+ "transformers_version": "4.42.4",
21
+ "trust_remote_code": [
22
+ false
23
+ ]
24
+ },
25
+ "conversation_template": {
26
+ "system_prompt": "",
27
+ "system_message_template": "",
28
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
30
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
31
+ "user_role": "user",
32
+ "bot_role": "assistant",
33
+ "system_role": "system",
34
+ "global_prefix": "<|begin_of_text|>",
35
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
36
+ "add_special_tokens": false,
37
+ "eos_token": "<|eot_id|>"
38
+ },
39
+ "load_in_8bit": false,
40
+ "torch_dtype": "auto",
41
+ "use_flash_attention_2": true,
42
+ "device_map": "cuda:0",
43
+ "use_fast_tokenizer": true,
44
+ "leading_space": false,
45
+ "space_token": null,
46
+ "trust_remote_code": [
47
+ false
48
+ ],
49
+ "max_model_len": 8192
50
+ },
51
+ "task_params": {
52
+ "max_len": 4000,
53
+ "few_shot_count": 0,
54
+ "batch_size": 8,
55
+ "max_sample_per_dataset": 10000000000000,
56
+ "method": "calculate_tokens_proba"
57
+ }
58
+ }
llmtf_eval/nlpcoreteam_enMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "nlpcoreteam/enMMLU",
3
+ "results": {
4
+ "acc": 0.6582382725054904
5
+ },
6
+ "leaderboard_result": 0.6582382725054904
7
+ }