End of training

Browse files

Files changed (5) hide show

README.md +83 -0
config.json +62 -0
generation_config.json +6 -0
model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,83 @@

+---
+license: apache-2.0
+base_model: google/flan-t5-small
+tags:
+- generated_from_trainer
+metrics:
+- rouge
+model-index:
+- name: flan-t5-base
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# flan-t5-base
+This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.7474
+- Rouge1: 15.6258
+- Rouge2: 5.8684
+- Rougel: 13.5135
+- Rougelsum: 14.5266
+- Gen Len: 19.0
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 5
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rouge1  | Rouge2 | Rougel  | Rougelsum | Gen Len |
+|:-------------:|:-----:|:----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
+| 2.3424        | 0.27  | 500  | 2.0519          | 13.8547 | 4.8819 | 12.0331 | 12.8514   | 19.0    |
+| 2.1616        | 0.53  | 1000 | 1.9535          | 14.7848 | 5.382  | 12.8365 | 13.6475   | 19.0    |
+| 2.0723        | 0.8   | 1500 | 1.9142          | 14.6906 | 5.434  | 12.8341 | 13.6491   | 19.0    |
+| 2.0202        | 1.07  | 2000 | 1.8883          | 14.8456 | 5.5148 | 12.7977 | 13.7626   | 19.0    |
+| 1.9921        | 1.33  | 2500 | 1.8473          | 14.8381 | 5.555  | 12.791  | 13.6959   | 19.0    |
+| 1.9539        | 1.6   | 3000 | 1.8293          | 15.2161 | 5.7276 | 13.1915 | 14.1315   | 19.0    |
+| 1.9455        | 1.87  | 3500 | 1.8166          | 15.2705 | 5.6751 | 13.2908 | 14.2064   | 19.0    |
+| 1.9266        | 2.13  | 4000 | 1.8018          | 15.303  | 5.7225 | 13.2318 | 14.1942   | 19.0    |
+| 1.8949        | 2.4   | 4500 | 1.7904          | 15.7181 | 6.0653 | 13.6993 | 14.5572   | 19.0    |
+| 1.906         | 2.67  | 5000 | 1.7814          | 15.7143 | 5.9897 | 13.6178 | 14.5986   | 19.0    |
+| 1.8737        | 2.93  | 5500 | 1.7706          | 15.4469 | 5.8011 | 13.3005 | 14.3128   | 19.0    |
+| 1.8779        | 3.2   | 6000 | 1.7668          | 15.6243 | 5.9534 | 13.5025 | 14.5397   | 19.0    |
+| 1.8638        | 3.47  | 6500 | 1.7629          | 15.3433 | 5.6495 | 13.251  | 14.3      | 19.0    |
+| 1.8644        | 3.73  | 7000 | 1.7559          | 15.4275 | 5.6924 | 13.2484 | 14.3135   | 19.0    |
+| 1.8389        | 4.0   | 7500 | 1.7522          | 15.5374 | 5.8713 | 13.4588 | 14.4702   | 19.0    |
+| 1.8467        | 4.27  | 8000 | 1.7507          | 15.47   | 5.7876 | 13.3985 | 14.4401   | 19.0    |
+| 1.8287        | 4.53  | 8500 | 1.7502          | 15.4761 | 5.7342 | 13.3502 | 14.4118   | 19.0    |
+| 1.8439        | 4.8   | 9000 | 1.7474          | 15.6258 | 5.8684 | 13.5135 | 14.5266   | 19.0    |
+### Framework versions
+- Transformers 4.38.2
+- Pytorch 2.2.1+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 1024,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 8,
+  "num_heads": 6,
+  "num_layers": 8,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 32128
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3894438bc3ce24b46387f75bb873f459030acea3c02d00c6a90190ad71a48c32
+size 307867048

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f06aef94fa30014cccb595d02e2a7fa4259752bf062741b237351c13e9a85cf0
+size 4984