Alex Spangher
commited on
Commit
•
0219ce0
1
Parent(s):
e3b4dcc
updated
Browse files- all_results.json +14 -0
- config.json +125 -0
- model.safetensors +3 -0
- post-training eval_results.json +8 -0
- prediction_output.jsonl +1 -0
- train_results.json +8 -0
- trainer_state.json +97 -0
- training_args.bin +3 -0
all_results.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 3.0,
|
3 |
+
"eval_samples": 34,
|
4 |
+
"test_f1": 0.6595240802735967,
|
5 |
+
"test_loss": 0.6290570497512817,
|
6 |
+
"test_runtime": 2.9031,
|
7 |
+
"test_samples_per_second": 11.712,
|
8 |
+
"test_steps_per_second": 11.712,
|
9 |
+
"train_loss": 0.5280582647873345,
|
10 |
+
"train_runtime": 227.8039,
|
11 |
+
"train_samples": 266,
|
12 |
+
"train_samples_per_second": 3.503,
|
13 |
+
"train_steps_per_second": 3.503
|
14 |
+
}
|
config.json
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "roberta-base",
|
3 |
+
"architectures": [
|
4 |
+
"SentenceClassificationModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classification_head": {
|
9 |
+
"num_labels": 6,
|
10 |
+
"pooling_method": "attention"
|
11 |
+
},
|
12 |
+
"classifier_dropout": null,
|
13 |
+
"context_config": {
|
14 |
+
"_name_or_path": "roberta-base",
|
15 |
+
"add_cross_attention": false,
|
16 |
+
"architectures": [
|
17 |
+
"RobertaForMaskedLM"
|
18 |
+
],
|
19 |
+
"attention_probs_dropout_prob": 0.1,
|
20 |
+
"bad_words_ids": null,
|
21 |
+
"begin_suppress_tokens": null,
|
22 |
+
"bos_token_id": 0,
|
23 |
+
"chunk_size_feed_forward": 0,
|
24 |
+
"classifier_dropout": null,
|
25 |
+
"cross_attention_hidden_size": null,
|
26 |
+
"decoder_start_token_id": null,
|
27 |
+
"diversity_penalty": 0.0,
|
28 |
+
"do_sample": false,
|
29 |
+
"early_stopping": false,
|
30 |
+
"encoder_no_repeat_ngram_size": 0,
|
31 |
+
"eos_token_id": 2,
|
32 |
+
"exponential_decay_length_penalty": null,
|
33 |
+
"finetuning_task": null,
|
34 |
+
"forced_bos_token_id": null,
|
35 |
+
"forced_eos_token_id": null,
|
36 |
+
"hidden_act": "gelu",
|
37 |
+
"hidden_dropout_prob": 0.1,
|
38 |
+
"hidden_size": 768,
|
39 |
+
"id2label": {
|
40 |
+
"0": "LABEL_0",
|
41 |
+
"1": "LABEL_1"
|
42 |
+
},
|
43 |
+
"initializer_range": 0.02,
|
44 |
+
"intermediate_size": 3072,
|
45 |
+
"is_decoder": false,
|
46 |
+
"is_encoder_decoder": false,
|
47 |
+
"label2id": {
|
48 |
+
"LABEL_0": 0,
|
49 |
+
"LABEL_1": 1
|
50 |
+
},
|
51 |
+
"layer_norm_eps": 1e-05,
|
52 |
+
"length_penalty": 1.0,
|
53 |
+
"max_length": 20,
|
54 |
+
"max_position_embeddings": 120,
|
55 |
+
"min_length": 0,
|
56 |
+
"model_type": "roberta",
|
57 |
+
"no_repeat_ngram_size": 0,
|
58 |
+
"num_attention_heads": 2,
|
59 |
+
"num_beam_groups": 1,
|
60 |
+
"num_beams": 1,
|
61 |
+
"num_hidden_layers": 2,
|
62 |
+
"num_return_sequences": 1,
|
63 |
+
"output_attentions": false,
|
64 |
+
"output_hidden_states": false,
|
65 |
+
"output_scores": false,
|
66 |
+
"pad_token_id": 1,
|
67 |
+
"position_embedding_type": "absolute",
|
68 |
+
"prefix": null,
|
69 |
+
"problem_type": null,
|
70 |
+
"pruned_heads": {},
|
71 |
+
"remove_invalid_values": false,
|
72 |
+
"repetition_penalty": 1.0,
|
73 |
+
"return_dict": true,
|
74 |
+
"return_dict_in_generate": false,
|
75 |
+
"sep_token_id": null,
|
76 |
+
"suppress_tokens": null,
|
77 |
+
"task_specific_params": null,
|
78 |
+
"temperature": 1.0,
|
79 |
+
"tf_legacy_loss": false,
|
80 |
+
"tie_encoder_decoder": false,
|
81 |
+
"tie_word_embeddings": true,
|
82 |
+
"tokenizer_class": null,
|
83 |
+
"top_k": 50,
|
84 |
+
"top_p": 1.0,
|
85 |
+
"torch_dtype": null,
|
86 |
+
"torchscript": false,
|
87 |
+
"transformers_version": "4.35.0",
|
88 |
+
"type_vocab_size": 1,
|
89 |
+
"typical_p": 1.0,
|
90 |
+
"use_bfloat16": false,
|
91 |
+
"use_cache": true,
|
92 |
+
"vocab_size": 50265
|
93 |
+
},
|
94 |
+
"context_layer": "transformer",
|
95 |
+
"eos_token_id": 2,
|
96 |
+
"frozen_layers": [
|
97 |
+
0,
|
98 |
+
1,
|
99 |
+
2,
|
100 |
+
3,
|
101 |
+
4,
|
102 |
+
5,
|
103 |
+
6,
|
104 |
+
7,
|
105 |
+
8,
|
106 |
+
9
|
107 |
+
],
|
108 |
+
"hidden_act": "gelu",
|
109 |
+
"hidden_dropout_prob": 0.1,
|
110 |
+
"hidden_size": 768,
|
111 |
+
"initializer_range": 0.02,
|
112 |
+
"intermediate_size": 3072,
|
113 |
+
"layer_norm_eps": 1e-05,
|
114 |
+
"max_position_embeddings": 514,
|
115 |
+
"model_type": "roberta",
|
116 |
+
"num_attention_heads": 12,
|
117 |
+
"num_hidden_layers": 12,
|
118 |
+
"pad_token_id": 1,
|
119 |
+
"position_embedding_type": "absolute",
|
120 |
+
"torch_dtype": "float32",
|
121 |
+
"transformers_version": "4.35.0",
|
122 |
+
"type_vocab_size": 1,
|
123 |
+
"use_cache": true,
|
124 |
+
"vocab_size": 50265
|
125 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0a8943cd3267c1698d67425db3b39331dd0ad2a57c5e59932707d8024fcfc07
|
3 |
+
size 714853440
|
post-training eval_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_samples": 34,
|
3 |
+
"test_f1": 0.6595240802735967,
|
4 |
+
"test_loss": 0.6290570497512817,
|
5 |
+
"test_runtime": 2.9031,
|
6 |
+
"test_samples_per_second": 11.712,
|
7 |
+
"test_steps_per_second": 11.712
|
8 |
+
}
|
prediction_output.jsonl
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pred": 7.805722713470459, "label": 0.0}, {"pred": -1.3506264686584473, "label": 4.0}, {"pred": -2.723374843597412, "label": 5.0}, {"pred": -1.896144986152649, "label": 4.0}, {"pred": -0.7635705471038818, "label": 3.0}, {"pred": -2.15873646736145, "label": 4.0}, {"pred": -1.2330230474472046, "label": 4.0}, {"pred": 0.00858240481466055, "label": 4.0}, {"pred": -3.58148455619812, "label": 2.0}, {"pred": -1.1138194799423218, "label": 2.0}, {"pred": 6.225915431976318, "label": 1.0}, {"pred": -0.36288589239120483, "label": 4.0}, {"pred": -2.230883836746216, "label": 4.0}, {"pred": 0.5709546804428101, "label": 3.0}, {"pred": -3.3159286975860596, "label": 1.0}, {"pred": -1.163223147392273, "label": 1.0}, {"pred": 6.794768810272217, "label": 4.0}, {"pred": 0.06500926613807678, "label": 4.0}, {"pred": -2.9693849086761475, "label": 1.0}, {"pred": 3.4366679191589355, "label": 1.0}, {"pred": -2.013765573501587, "label": 4.0}, {"pred": -1.7807642221450806, "label": 4.0}, {"pred": 4.4118475914001465, "label": 4.0}, {"pred": -0.10153692960739136, "label": 1.0}, {"pred": -2.5499534606933594, "label": 4.0}, {"pred": 1.1678745746612549, "label": 4.0}, {"pred": -1.8997702598571777, "label": 0.0}, {"pred": 6.152190685272217, "label": 0.0}, {"pred": 0.9506238698959351, "label": 1.0}, {"pred": -1.8455047607421875, "label": 3.0}, {"pred": -2.5474693775177, "label": 3.0}, {"pred": 1.6176929473876953, "label": 4.0}, {"pred": -1.6770298480987549, "label": 4.0}, {"pred": -1.8495861291885376, "label": 4.0}, {"pred": 5.906722068786621, "label": 4.0}, {"pred": -0.9954162240028381, "label": 5.0}, {"pred": -1.280759572982788, "label": 1.0}, {"pred": -1.7175190448760986, "label": 1.0}, {"pred": -3.5523223876953125, "label": 4.0}, {"pred": -1.1674764156341553, "label": 4.0}, {"pred": 7.825597286224365, "label": 4.0}, {"pred": -0.48608145117759705, "label": 4.0}, {"pred": 2.463496685028076, "label": 4.0}, {"pred": -0.5277228355407715, "label": 3.0}, {"pred": -3.61362361907959, "label": 3.0}, {"pred": -0.658549427986145, "label": 3.0}, {"pred": 3.9307382106781006, "label": 3.0}, {"pred": -1.8052740097045898, "label": 3.0}, {"pred": -2.1290013790130615, "label": 3.0}, {"pred": -0.8058973550796509, "label": 4.0}, {"pred": 5.492187023162842, "label": 4.0}, {"pred": -1.329720139503479, "label": 4.0}, {"pred": 1.065961480140686, "label": 4.0}, {"pred": -1.59515380859375, "label": 4.0}, {"pred": -3.256868362426758, "label": 4.0}, {"pred": -0.772830605506897, "label": 4.0}, {"pred": 4.408609867095947, "label": 4.0}, {"pred": 4.209987163543701, "label": 4.0}, {"pred": -1.185229778289795, "label": 4.0}, {"pred": -1.233396291732788, "label": 4.0}, {"pred": -3.5003533363342285, "label": 4.0}, {"pred": 0.6357506513595581, "label": 4.0}, {"pred": 0.5595592856407166, "label": 0.0}, {"pred": 0.1636270135641098, "label": 4.0}, {"pred": 3.622666597366333, "label": 4.0}, {"pred": 1.435227870941162, "label": 4.0}, {"pred": -2.6869592666625977, "label": 4.0}, {"pred": 5.5428595542907715, "label": 4.0}, {"pred": -1.6263177394866943, "label": 4.0}, {"pred": -1.333096981048584, "label": 0.0}, {"pred": 2.459481954574585, "label": 0.0}, {"pred": -0.9780612587928772, "label": 0.0}, {"pred": -1.7666338682174683, "label": 4.0}, {"pred": -0.1907656043767929, "label": 4.0}, {"pred": -4.156367778778076, "label": 4.0}, {"pred": -1.1733185052871704, "label": 1.0}, {"pred": 6.804201602935791, "label": 4.0}, {"pred": 0.20611877739429474, "label": 4.0}, {"pred": -1.7806588411331177, "label": 4.0}, {"pred": -0.7849682569503784, "label": 4.0}, {"pred": 0.1287001520395279, "label": 4.0}, {"pred": 6.6045684814453125, "label": 4.0}, {"pred": -0.7379199266433716, "label": 0.0}, {"pred": -1.5124040842056274, "label": 1.0}, {"pred": -2.282989501953125, "label": 4.0}, {"pred": 6.492152690887451, "label": 1.0}, {"pred": -1.6549134254455566, "label": 4.0}, {"pred": -1.3640364408493042, "label": 4.0}, {"pred": 0.6657000780105591, "label": 1.0}, {"pred": -0.20341187715530396, "label": 1.0}, {"pred": -2.8531274795532227, "label": 4.0}, {"pred": 5.445215702056885, "label": 4.0}, {"pred": -2.088623523712158, "label": 4.0}, {"pred": -1.371069312095642, "label": 4.0}, {"pred": 2.7984957695007324, "label": 4.0}, {"pred": -0.8653277158737183, "label": 4.0}, {"pred": -1.745413899421692, "label": 1.0}, {"pred": 0.050096578896045685, "label": 1.0}, {"pred": -2.334995746612549, "label": 2.0}, {"pred": -1.3406864404678345, "label": 1.0}, {"pred": 6.671549320220947, "label": 4.0}, {"pred": -0.8588800430297852, "label": 4.0}, {"pred": -2.07035493850708, "label": 4.0}, {"pred": 6.2443060874938965, "label": 4.0}, {"pred": -2.2285091876983643, "label": 1.0}, {"pred": -1.5247095823287964, "label": 4.0}, {"pred": 1.2969790697097778, "label": 1.0}, {"pred": -1.2332247495651245, "label": 1.0}, {"pred": -2.1368021965026855, "label": 1.0}, {"pred": 6.383357524871826, "label": 4.0}, {"pred": -2.5497217178344727, "label": 4.0}, {"pred": -1.8065745830535889, "label": 1.0}, {"pred": 1.763480544090271, "label": 4.0}, {"pred": -1.0267008543014526, "label": 4.0}, {"pred": -2.816977024078369, "label": 4.0}, {"pred": 3.4707865715026855, "label": 1.0}, {"pred": -1.6810994148254395, "label": 3.0}, {"pred": 3.311030149459839, "label": 4.0}, {"pred": 1.0374013185501099, "label": 4.0}, {"pred": -2.034620523452759, "label": 2.0}, {"pred": -1.3881349563598633, "label": 4.0}, {"pred": -0.8568321466445923, "label": 4.0}, {"pred": -3.526418685913086, "label": 0.0}, {"pred": -0.8116082549095154, "label": 0.0}, {"pred": 7.840891361236572, "label": 4.0}, {"pred": -0.7322971224784851, "label": 0.0}, {"pred": -3.2228617668151855, "label": 1.0}, {"pred": 1.3149045705795288, "label": 4.0}, {"pred": -2.74904203414917, "label": 1.0}, {"pred": -1.4809495210647583, "label": 4.0}, {"pred": 7.021670341491699, "label": 2.0}, {"pred": -0.8419994115829468, "label": 2.0}, {"pred": -1.8306918144226074, "label": 4.0}, {"pred": -0.46144601702690125, "label": 2.0}, {"pred": -3.881315231323242, "label": 4.0}, {"pred": -1.531129240989685, "label": 4.0}, {"pred": 7.684051990509033, "label": 4.0}, {"pred": -0.368630051612854, "label": 4.0}, {"pred": -3.746354818344116, "label": 4.0}, {"pred": 5.081518650054932, "label": 4.0}, {"pred": -1.8096411228179932, "label": 4.0}, {"pred": -0.8847378492355347, "label": 4.0}, {"pred": 3.077164888381958, "label": 4.0}, {"pred": -1.415289044380188, "label": 1.0}, {"pred": -1.0814390182495117, "label": 0.0}, {"pred": -1.6681712865829468, "label": 1.0}, {"pred": -4.204435348510742, "label": 4.0}, {"pred": -1.584557056427002, "label": 4.0}, {"pred": 6.896392822265625, "label": 1.0}, {"pred": 0.3674514591693878, "label": 4.0}, {"pred": -1.3221633434295654, "label": 4.0}, {"pred": -1.4246068000793457, "label": 4.0}, {"pred": -2.8050172328948975, "label": 0.0}, {"pred": -1.0886117219924927, "label": 4.0}, {"pred": 7.081745624542236, "label": 5.0}, {"pred": -0.7080103158950806, "label": 4.0}]
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 3.0,
|
3 |
+
"train_loss": 0.5280582647873345,
|
4 |
+
"train_runtime": 227.8039,
|
5 |
+
"train_samples": 266,
|
6 |
+
"train_samples_per_second": 3.503,
|
7 |
+
"train_steps_per_second": 3.503
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 798,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.38,
|
13 |
+
"eval_f1": 0.5937051387034437,
|
14 |
+
"eval_loss": 0.5818731188774109,
|
15 |
+
"eval_runtime": 2.6676,
|
16 |
+
"eval_samples_per_second": 12.746,
|
17 |
+
"eval_steps_per_second": 12.746,
|
18 |
+
"step": 100
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"epoch": 0.75,
|
22 |
+
"eval_f1": 0.6147189338467695,
|
23 |
+
"eval_loss": 0.5344187617301941,
|
24 |
+
"eval_runtime": 2.5934,
|
25 |
+
"eval_samples_per_second": 13.11,
|
26 |
+
"eval_steps_per_second": 13.11,
|
27 |
+
"step": 200
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 1.13,
|
31 |
+
"eval_f1": 0.616141567791633,
|
32 |
+
"eval_loss": 0.7152214050292969,
|
33 |
+
"eval_runtime": 2.6291,
|
34 |
+
"eval_samples_per_second": 12.932,
|
35 |
+
"eval_steps_per_second": 12.932,
|
36 |
+
"step": 300
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 1.5,
|
40 |
+
"eval_f1": 0.6262624949298642,
|
41 |
+
"eval_loss": 0.5542092323303223,
|
42 |
+
"eval_runtime": 2.8526,
|
43 |
+
"eval_samples_per_second": 11.919,
|
44 |
+
"eval_steps_per_second": 11.919,
|
45 |
+
"step": 400
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 1.88,
|
49 |
+
"learning_rate": 1.8671679197994987e-05,
|
50 |
+
"loss": 0.6563,
|
51 |
+
"step": 500
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.88,
|
55 |
+
"eval_f1": 0.6518478762537062,
|
56 |
+
"eval_loss": 0.5657368898391724,
|
57 |
+
"eval_runtime": 2.8114,
|
58 |
+
"eval_samples_per_second": 12.093,
|
59 |
+
"eval_steps_per_second": 12.093,
|
60 |
+
"step": 500
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"epoch": 2.26,
|
64 |
+
"eval_f1": 0.6421228870345159,
|
65 |
+
"eval_loss": 0.602083683013916,
|
66 |
+
"eval_runtime": 2.8782,
|
67 |
+
"eval_samples_per_second": 11.813,
|
68 |
+
"eval_steps_per_second": 11.813,
|
69 |
+
"step": 600
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 2.63,
|
73 |
+
"eval_f1": 0.6347334214391783,
|
74 |
+
"eval_loss": 0.6153014302253723,
|
75 |
+
"eval_runtime": 2.7323,
|
76 |
+
"eval_samples_per_second": 12.444,
|
77 |
+
"eval_steps_per_second": 12.444,
|
78 |
+
"step": 700
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 3.0,
|
82 |
+
"step": 798,
|
83 |
+
"total_flos": 1287131566841280.0,
|
84 |
+
"train_loss": 0.5280582647873345,
|
85 |
+
"train_runtime": 227.8039,
|
86 |
+
"train_samples_per_second": 3.503,
|
87 |
+
"train_steps_per_second": 3.503
|
88 |
+
}
|
89 |
+
],
|
90 |
+
"logging_steps": 500,
|
91 |
+
"max_steps": 798,
|
92 |
+
"num_train_epochs": 3,
|
93 |
+
"save_steps": 500,
|
94 |
+
"total_flos": 1287131566841280.0,
|
95 |
+
"trial_name": null,
|
96 |
+
"trial_params": null
|
97 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81ec612b3442bc00ee2d10d6e85ea302f91911db3a2aa4ad9fd82597bbc1bacd
|
3 |
+
size 4600
|