HugoVoxx/Gemma-2-2b-it-ag

Browse files

Files changed (7) hide show

README.md +37 -48
adapter_config.json +4 -4
adapter_model.safetensors +2 -2
special_tokens_map.json +23 -17
tokenizer.json +2 -2
tokenizer_config.json +7 -23
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,68 +1,57 @@
 ---
 base_model: google/gemma-2-2b-it
-library_name: peft
-license: gemma
 tags:
 - trl
 - sft
-- generated_from_trainer
-model-index:
-- name: Gemma-2-2b-it-ag
-  results: []
 ---
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
-# Gemma-2-2b-it-ag
-This model is a fine-tuned version of [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it) on an unknown dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.0122
-## Model description
-More information needed
-## Intended uses & limitations
-More information needed
-## Training and evaluation data
-More information needed
 ## Training procedure
-### Training hyperparameters
-The following hyperparameters were used during training:
-- learning_rate: 0.0002
-- train_batch_size: 1
-- eval_batch_size: 1
-- seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 2
-- optimizer: Use paged_adamw_32bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
-- lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 10
-- num_epochs: 1
-### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 0.0122        | 0.1999 | 341  | 0.0122          |
-| 0.0122        | 0.3999 | 682  | 0.0122          |
-| 0.0122        | 0.5998 | 1023 | 0.0122          |
-| 0.0122        | 0.7998 | 1364 | 0.0122          |
-| 0.0122        | 0.9997 | 1705 | 0.0122          |
-### Framework versions
-- PEFT 0.13.2
-- Transformers 4.46.1
-- Pytorch 2.4.0
-- Datasets 3.0.2
-- Tokenizers 0.20.0

 ---
 base_model: google/gemma-2-2b-it
+library_name: transformers
+model_name: Gemma-2-2b-it-ag
 tags:
+- generated_from_trainer
 - trl
 - sft
+licence: license
 ---
+# Model Card for Gemma-2-2b-it-ag
+This model is a fine-tuned version of [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="HugoVoxx/Gemma-2-2b-it-ag", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/hugovoxx-fpt-university/Fine-tune%20Gemma-2-2b-it%20on%20AlphaGeometry%20Dataset/runs/xdfyig16)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.12.0
+- Transformers: 4.46.1
+- Pytorch: 2.4.0
+- Datasets: 3.1.0
+- Tokenizers: 0.20.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "q_proj",
-    "down_proj",
-    "o_proj",
-    "up_proj",
     "gate_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
     "v_proj",
+    "k_proj",
     "q_proj",
     "gate_proj",
+    "down_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b7f363a864ecf4f9ac277a22bdc08c38c4b7d89fd89ff17db290d2d17b5e9cf
-size 3023899144

 version https://git-lfs.github.com/spec/v1
+oid sha256:42e5fe428e704cc8508a60c08a3a822e1c6a424a9bb6b4d3701123b4f8e26d11
+size 664584480

special_tokens_map.json CHANGED Viewed

@@ -1,23 +1,29 @@
 {
   "additional_special_tokens": [
-    {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
-    },
-    {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
-    }
   ],
-  "bos_token": "<|im_start|>",
-  "eos_token": "<|im_end|>",
-  "pad_token": "<|im_end|>",
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

 {
   "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
   ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3e1a1540e64f08c2a8d2bfac6541f1ed3c14f1f09509531b3e09c650d762ae3
-size 34363348

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6ce83119bb404f7f0a6e621b76759d476357dcd01241a90f9ca136ae2b3c11c
+size 34362972

tokenizer_config.json CHANGED Viewed

@@ -1993,34 +1993,18 @@
       "rstrip": false,
       "single_word": false,
       "special": false
-    },
-    "256000": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "256001": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
   "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>"
   ],
-  "bos_token": "<|im_start|>",
-  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "model_max_length": 2048,
-  "pad_token": "<|im_end|>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "GemmaTokenizer",

       "rstrip": false,
       "single_word": false,
       "special": false
     }
   },
   "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
   ],
+  "bos_token": "<bos>",
+  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "GemmaTokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2e61fc641a174ef330f2acb78e514951066939ffc24db43bb1646aef19cf8b2
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ffc68c09366bc5c66183fe5319583650d3e138420f72da3a8736c2a28a9be7e
 size 5496