Alex Spangher commited on
Commit
0219ce0
1 Parent(s): e3b4dcc
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_samples": 34,
4
+ "test_f1": 0.6595240802735967,
5
+ "test_loss": 0.6290570497512817,
6
+ "test_runtime": 2.9031,
7
+ "test_samples_per_second": 11.712,
8
+ "test_steps_per_second": 11.712,
9
+ "train_loss": 0.5280582647873345,
10
+ "train_runtime": 227.8039,
11
+ "train_samples": 266,
12
+ "train_samples_per_second": 3.503,
13
+ "train_steps_per_second": 3.503
14
+ }
config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "SentenceClassificationModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classification_head": {
9
+ "num_labels": 6,
10
+ "pooling_method": "attention"
11
+ },
12
+ "classifier_dropout": null,
13
+ "context_config": {
14
+ "_name_or_path": "roberta-base",
15
+ "add_cross_attention": false,
16
+ "architectures": [
17
+ "RobertaForMaskedLM"
18
+ ],
19
+ "attention_probs_dropout_prob": 0.1,
20
+ "bad_words_ids": null,
21
+ "begin_suppress_tokens": null,
22
+ "bos_token_id": 0,
23
+ "chunk_size_feed_forward": 0,
24
+ "classifier_dropout": null,
25
+ "cross_attention_hidden_size": null,
26
+ "decoder_start_token_id": null,
27
+ "diversity_penalty": 0.0,
28
+ "do_sample": false,
29
+ "early_stopping": false,
30
+ "encoder_no_repeat_ngram_size": 0,
31
+ "eos_token_id": 2,
32
+ "exponential_decay_length_penalty": null,
33
+ "finetuning_task": null,
34
+ "forced_bos_token_id": null,
35
+ "forced_eos_token_id": null,
36
+ "hidden_act": "gelu",
37
+ "hidden_dropout_prob": 0.1,
38
+ "hidden_size": 768,
39
+ "id2label": {
40
+ "0": "LABEL_0",
41
+ "1": "LABEL_1"
42
+ },
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 3072,
45
+ "is_decoder": false,
46
+ "is_encoder_decoder": false,
47
+ "label2id": {
48
+ "LABEL_0": 0,
49
+ "LABEL_1": 1
50
+ },
51
+ "layer_norm_eps": 1e-05,
52
+ "length_penalty": 1.0,
53
+ "max_length": 20,
54
+ "max_position_embeddings": 120,
55
+ "min_length": 0,
56
+ "model_type": "roberta",
57
+ "no_repeat_ngram_size": 0,
58
+ "num_attention_heads": 2,
59
+ "num_beam_groups": 1,
60
+ "num_beams": 1,
61
+ "num_hidden_layers": 2,
62
+ "num_return_sequences": 1,
63
+ "output_attentions": false,
64
+ "output_hidden_states": false,
65
+ "output_scores": false,
66
+ "pad_token_id": 1,
67
+ "position_embedding_type": "absolute",
68
+ "prefix": null,
69
+ "problem_type": null,
70
+ "pruned_heads": {},
71
+ "remove_invalid_values": false,
72
+ "repetition_penalty": 1.0,
73
+ "return_dict": true,
74
+ "return_dict_in_generate": false,
75
+ "sep_token_id": null,
76
+ "suppress_tokens": null,
77
+ "task_specific_params": null,
78
+ "temperature": 1.0,
79
+ "tf_legacy_loss": false,
80
+ "tie_encoder_decoder": false,
81
+ "tie_word_embeddings": true,
82
+ "tokenizer_class": null,
83
+ "top_k": 50,
84
+ "top_p": 1.0,
85
+ "torch_dtype": null,
86
+ "torchscript": false,
87
+ "transformers_version": "4.35.0",
88
+ "type_vocab_size": 1,
89
+ "typical_p": 1.0,
90
+ "use_bfloat16": false,
91
+ "use_cache": true,
92
+ "vocab_size": 50265
93
+ },
94
+ "context_layer": "transformer",
95
+ "eos_token_id": 2,
96
+ "frozen_layers": [
97
+ 0,
98
+ 1,
99
+ 2,
100
+ 3,
101
+ 4,
102
+ 5,
103
+ 6,
104
+ 7,
105
+ 8,
106
+ 9
107
+ ],
108
+ "hidden_act": "gelu",
109
+ "hidden_dropout_prob": 0.1,
110
+ "hidden_size": 768,
111
+ "initializer_range": 0.02,
112
+ "intermediate_size": 3072,
113
+ "layer_norm_eps": 1e-05,
114
+ "max_position_embeddings": 514,
115
+ "model_type": "roberta",
116
+ "num_attention_heads": 12,
117
+ "num_hidden_layers": 12,
118
+ "pad_token_id": 1,
119
+ "position_embedding_type": "absolute",
120
+ "torch_dtype": "float32",
121
+ "transformers_version": "4.35.0",
122
+ "type_vocab_size": 1,
123
+ "use_cache": true,
124
+ "vocab_size": 50265
125
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a8943cd3267c1698d67425db3b39331dd0ad2a57c5e59932707d8024fcfc07
3
+ size 714853440
post-training eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_samples": 34,
3
+ "test_f1": 0.6595240802735967,
4
+ "test_loss": 0.6290570497512817,
5
+ "test_runtime": 2.9031,
6
+ "test_samples_per_second": 11.712,
7
+ "test_steps_per_second": 11.712
8
+ }
prediction_output.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pred": 7.805722713470459, "label": 0.0}, {"pred": -1.3506264686584473, "label": 4.0}, {"pred": -2.723374843597412, "label": 5.0}, {"pred": -1.896144986152649, "label": 4.0}, {"pred": -0.7635705471038818, "label": 3.0}, {"pred": -2.15873646736145, "label": 4.0}, {"pred": -1.2330230474472046, "label": 4.0}, {"pred": 0.00858240481466055, "label": 4.0}, {"pred": -3.58148455619812, "label": 2.0}, {"pred": -1.1138194799423218, "label": 2.0}, {"pred": 6.225915431976318, "label": 1.0}, {"pred": -0.36288589239120483, "label": 4.0}, {"pred": -2.230883836746216, "label": 4.0}, {"pred": 0.5709546804428101, "label": 3.0}, {"pred": -3.3159286975860596, "label": 1.0}, {"pred": -1.163223147392273, "label": 1.0}, {"pred": 6.794768810272217, "label": 4.0}, {"pred": 0.06500926613807678, "label": 4.0}, {"pred": -2.9693849086761475, "label": 1.0}, {"pred": 3.4366679191589355, "label": 1.0}, {"pred": -2.013765573501587, "label": 4.0}, {"pred": -1.7807642221450806, "label": 4.0}, {"pred": 4.4118475914001465, "label": 4.0}, {"pred": -0.10153692960739136, "label": 1.0}, {"pred": -2.5499534606933594, "label": 4.0}, {"pred": 1.1678745746612549, "label": 4.0}, {"pred": -1.8997702598571777, "label": 0.0}, {"pred": 6.152190685272217, "label": 0.0}, {"pred": 0.9506238698959351, "label": 1.0}, {"pred": -1.8455047607421875, "label": 3.0}, {"pred": -2.5474693775177, "label": 3.0}, {"pred": 1.6176929473876953, "label": 4.0}, {"pred": -1.6770298480987549, "label": 4.0}, {"pred": -1.8495861291885376, "label": 4.0}, {"pred": 5.906722068786621, "label": 4.0}, {"pred": -0.9954162240028381, "label": 5.0}, {"pred": -1.280759572982788, "label": 1.0}, {"pred": -1.7175190448760986, "label": 1.0}, {"pred": -3.5523223876953125, "label": 4.0}, {"pred": -1.1674764156341553, "label": 4.0}, {"pred": 7.825597286224365, "label": 4.0}, {"pred": -0.48608145117759705, "label": 4.0}, {"pred": 2.463496685028076, "label": 4.0}, {"pred": -0.5277228355407715, "label": 3.0}, {"pred": -3.61362361907959, "label": 3.0}, {"pred": -0.658549427986145, "label": 3.0}, {"pred": 3.9307382106781006, "label": 3.0}, {"pred": -1.8052740097045898, "label": 3.0}, {"pred": -2.1290013790130615, "label": 3.0}, {"pred": -0.8058973550796509, "label": 4.0}, {"pred": 5.492187023162842, "label": 4.0}, {"pred": -1.329720139503479, "label": 4.0}, {"pred": 1.065961480140686, "label": 4.0}, {"pred": -1.59515380859375, "label": 4.0}, {"pred": -3.256868362426758, "label": 4.0}, {"pred": -0.772830605506897, "label": 4.0}, {"pred": 4.408609867095947, "label": 4.0}, {"pred": 4.209987163543701, "label": 4.0}, {"pred": -1.185229778289795, "label": 4.0}, {"pred": -1.233396291732788, "label": 4.0}, {"pred": -3.5003533363342285, "label": 4.0}, {"pred": 0.6357506513595581, "label": 4.0}, {"pred": 0.5595592856407166, "label": 0.0}, {"pred": 0.1636270135641098, "label": 4.0}, {"pred": 3.622666597366333, "label": 4.0}, {"pred": 1.435227870941162, "label": 4.0}, {"pred": -2.6869592666625977, "label": 4.0}, {"pred": 5.5428595542907715, "label": 4.0}, {"pred": -1.6263177394866943, "label": 4.0}, {"pred": -1.333096981048584, "label": 0.0}, {"pred": 2.459481954574585, "label": 0.0}, {"pred": -0.9780612587928772, "label": 0.0}, {"pred": -1.7666338682174683, "label": 4.0}, {"pred": -0.1907656043767929, "label": 4.0}, {"pred": -4.156367778778076, "label": 4.0}, {"pred": -1.1733185052871704, "label": 1.0}, {"pred": 6.804201602935791, "label": 4.0}, {"pred": 0.20611877739429474, "label": 4.0}, {"pred": -1.7806588411331177, "label": 4.0}, {"pred": -0.7849682569503784, "label": 4.0}, {"pred": 0.1287001520395279, "label": 4.0}, {"pred": 6.6045684814453125, "label": 4.0}, {"pred": -0.7379199266433716, "label": 0.0}, {"pred": -1.5124040842056274, "label": 1.0}, {"pred": -2.282989501953125, "label": 4.0}, {"pred": 6.492152690887451, "label": 1.0}, {"pred": -1.6549134254455566, "label": 4.0}, {"pred": -1.3640364408493042, "label": 4.0}, {"pred": 0.6657000780105591, "label": 1.0}, {"pred": -0.20341187715530396, "label": 1.0}, {"pred": -2.8531274795532227, "label": 4.0}, {"pred": 5.445215702056885, "label": 4.0}, {"pred": -2.088623523712158, "label": 4.0}, {"pred": -1.371069312095642, "label": 4.0}, {"pred": 2.7984957695007324, "label": 4.0}, {"pred": -0.8653277158737183, "label": 4.0}, {"pred": -1.745413899421692, "label": 1.0}, {"pred": 0.050096578896045685, "label": 1.0}, {"pred": -2.334995746612549, "label": 2.0}, {"pred": -1.3406864404678345, "label": 1.0}, {"pred": 6.671549320220947, "label": 4.0}, {"pred": -0.8588800430297852, "label": 4.0}, {"pred": -2.07035493850708, "label": 4.0}, {"pred": 6.2443060874938965, "label": 4.0}, {"pred": -2.2285091876983643, "label": 1.0}, {"pred": -1.5247095823287964, "label": 4.0}, {"pred": 1.2969790697097778, "label": 1.0}, {"pred": -1.2332247495651245, "label": 1.0}, {"pred": -2.1368021965026855, "label": 1.0}, {"pred": 6.383357524871826, "label": 4.0}, {"pred": -2.5497217178344727, "label": 4.0}, {"pred": -1.8065745830535889, "label": 1.0}, {"pred": 1.763480544090271, "label": 4.0}, {"pred": -1.0267008543014526, "label": 4.0}, {"pred": -2.816977024078369, "label": 4.0}, {"pred": 3.4707865715026855, "label": 1.0}, {"pred": -1.6810994148254395, "label": 3.0}, {"pred": 3.311030149459839, "label": 4.0}, {"pred": 1.0374013185501099, "label": 4.0}, {"pred": -2.034620523452759, "label": 2.0}, {"pred": -1.3881349563598633, "label": 4.0}, {"pred": -0.8568321466445923, "label": 4.0}, {"pred": -3.526418685913086, "label": 0.0}, {"pred": -0.8116082549095154, "label": 0.0}, {"pred": 7.840891361236572, "label": 4.0}, {"pred": -0.7322971224784851, "label": 0.0}, {"pred": -3.2228617668151855, "label": 1.0}, {"pred": 1.3149045705795288, "label": 4.0}, {"pred": -2.74904203414917, "label": 1.0}, {"pred": -1.4809495210647583, "label": 4.0}, {"pred": 7.021670341491699, "label": 2.0}, {"pred": -0.8419994115829468, "label": 2.0}, {"pred": -1.8306918144226074, "label": 4.0}, {"pred": -0.46144601702690125, "label": 2.0}, {"pred": -3.881315231323242, "label": 4.0}, {"pred": -1.531129240989685, "label": 4.0}, {"pred": 7.684051990509033, "label": 4.0}, {"pred": -0.368630051612854, "label": 4.0}, {"pred": -3.746354818344116, "label": 4.0}, {"pred": 5.081518650054932, "label": 4.0}, {"pred": -1.8096411228179932, "label": 4.0}, {"pred": -0.8847378492355347, "label": 4.0}, {"pred": 3.077164888381958, "label": 4.0}, {"pred": -1.415289044380188, "label": 1.0}, {"pred": -1.0814390182495117, "label": 0.0}, {"pred": -1.6681712865829468, "label": 1.0}, {"pred": -4.204435348510742, "label": 4.0}, {"pred": -1.584557056427002, "label": 4.0}, {"pred": 6.896392822265625, "label": 1.0}, {"pred": 0.3674514591693878, "label": 4.0}, {"pred": -1.3221633434295654, "label": 4.0}, {"pred": -1.4246068000793457, "label": 4.0}, {"pred": -2.8050172328948975, "label": 0.0}, {"pred": -1.0886117219924927, "label": 4.0}, {"pred": 7.081745624542236, "label": 5.0}, {"pred": -0.7080103158950806, "label": 4.0}]
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.5280582647873345,
4
+ "train_runtime": 227.8039,
5
+ "train_samples": 266,
6
+ "train_samples_per_second": 3.503,
7
+ "train_steps_per_second": 3.503
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 100,
6
+ "global_step": 798,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.38,
13
+ "eval_f1": 0.5937051387034437,
14
+ "eval_loss": 0.5818731188774109,
15
+ "eval_runtime": 2.6676,
16
+ "eval_samples_per_second": 12.746,
17
+ "eval_steps_per_second": 12.746,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.75,
22
+ "eval_f1": 0.6147189338467695,
23
+ "eval_loss": 0.5344187617301941,
24
+ "eval_runtime": 2.5934,
25
+ "eval_samples_per_second": 13.11,
26
+ "eval_steps_per_second": 13.11,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 1.13,
31
+ "eval_f1": 0.616141567791633,
32
+ "eval_loss": 0.7152214050292969,
33
+ "eval_runtime": 2.6291,
34
+ "eval_samples_per_second": 12.932,
35
+ "eval_steps_per_second": 12.932,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 1.5,
40
+ "eval_f1": 0.6262624949298642,
41
+ "eval_loss": 0.5542092323303223,
42
+ "eval_runtime": 2.8526,
43
+ "eval_samples_per_second": 11.919,
44
+ "eval_steps_per_second": 11.919,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 1.88,
49
+ "learning_rate": 1.8671679197994987e-05,
50
+ "loss": 0.6563,
51
+ "step": 500
52
+ },
53
+ {
54
+ "epoch": 1.88,
55
+ "eval_f1": 0.6518478762537062,
56
+ "eval_loss": 0.5657368898391724,
57
+ "eval_runtime": 2.8114,
58
+ "eval_samples_per_second": 12.093,
59
+ "eval_steps_per_second": 12.093,
60
+ "step": 500
61
+ },
62
+ {
63
+ "epoch": 2.26,
64
+ "eval_f1": 0.6421228870345159,
65
+ "eval_loss": 0.602083683013916,
66
+ "eval_runtime": 2.8782,
67
+ "eval_samples_per_second": 11.813,
68
+ "eval_steps_per_second": 11.813,
69
+ "step": 600
70
+ },
71
+ {
72
+ "epoch": 2.63,
73
+ "eval_f1": 0.6347334214391783,
74
+ "eval_loss": 0.6153014302253723,
75
+ "eval_runtime": 2.7323,
76
+ "eval_samples_per_second": 12.444,
77
+ "eval_steps_per_second": 12.444,
78
+ "step": 700
79
+ },
80
+ {
81
+ "epoch": 3.0,
82
+ "step": 798,
83
+ "total_flos": 1287131566841280.0,
84
+ "train_loss": 0.5280582647873345,
85
+ "train_runtime": 227.8039,
86
+ "train_samples_per_second": 3.503,
87
+ "train_steps_per_second": 3.503
88
+ }
89
+ ],
90
+ "logging_steps": 500,
91
+ "max_steps": 798,
92
+ "num_train_epochs": 3,
93
+ "save_steps": 500,
94
+ "total_flos": 1287131566841280.0,
95
+ "trial_name": null,
96
+ "trial_params": null
97
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81ec612b3442bc00ee2d10d6e85ea302f91911db3a2aa4ad9fd82597bbc1bacd
3
+ size 4600