update

Browse files

Files changed (7) hide show

README.md +0 -145
config.json +0 -28
generation_config.json +0 -7
pytorch_model.bin +0 -3
special_tokens_map.json +0 -24
tokenizer.model +0 -3
tokenizer_config.json +0 -44

README.md DELETED Viewed

@@ -1,145 +0,0 @@
----
-tags:
-- generated_from_trainer
-model-index:
-- name: out
-  results: []
----
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
-<details><summary>See axolotl config</summary>
-axolotl version: `0.3.0`
-```yaml
-base_model: ./TinyLlama-1.1B-intermediate-step-1431k-3T
-model_type: LlamaForCausalLM
-tokenizer_type: LlamaTokenizer
-is_llama_derived_model: true
-load_in_8bit: false
-load_in_4bit: false
-strict: false
-datasets:
-  - path: ./openhermes
-    type: alpaca
-dataset_prepared_path:
-val_set_size: 0.05
-output_dir: ./out
-sequence_len: 4096
-sample_packing: false
-adapter:
-lora_model_dir:
-lora_r:
-lora_alpha:
-lora_dropout:
-lora_target_linear:
-lora_fan_in_fan_out:
-wandb_project: tinyllama-openhermes
-wandb_entity: tensoic
-wandb_watch:
-wandb_name:
-wandb_log_model:
-gradient_accumulation_steps: 2
-micro_batch_size: 8
-num_epochs: 1
-optimizer: adamw_bnb_8bit
-adam_epsilon: 0.00001
-max_grad_norm: 1.0
-lr_scheduler: cosine
-learning_rate: 0.0002
-train_on_inputs: false
-group_by_length: false
-bf16: false
-fp16: true
-tf32: false
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention: true
-flash_attention:
-warmup_steps: 100
-evals_per_epoch: 4
-eval_table_size:
-saves_per_epoch: 1
-debug:
-deepspeed: zero2.json
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-  bos_token: "<s>"
-  eos_token: "</s>"
-  unk_token: "<unk>"
-```
-</details><br>
-# out
-This model was trained from scratch on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 1.3647
-## Model description
-More information needed
-## Intended uses & limitations
-More information needed
-## Training and evaluation data
-More information needed
-## Training procedure
-### Training hyperparameters
-The following hyperparameters were used during training:
-- learning_rate: 0.0002
-- train_batch_size: 8
-- eval_batch_size: 8
-- seed: 42
-- distributed_type: multi-GPU
-- num_devices: 8
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 128
-- total_eval_batch_size: 64
-- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-05
-- lr_scheduler_type: cosine
-- lr_scheduler_warmup_steps: 100
-- num_epochs: 1
-- mixed_precision_training: Native AMP
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 3.0006        | 0.0   | 1    | 1.6838          |
-| 0.8195        | 0.25  | 451  | 1.4620          |
-| 0.6836        | 0.5   | 902  | 1.4158          |
-| 0.6811        | 0.75  | 1353 | 1.3647          |
-### Framework versions
-- Transformers 4.36.2
-- Pytorch 2.0.1+cu117
-- Datasets 2.15.0
-- Tokenizers 0.15.0

config.json DELETED Viewed

@@ -1,28 +0,0 @@
-{
-  "_name_or_path": "./TinyLlama-1.1B-intermediate-step-1431k-3T",
-  "architectures": [
-    "LlamaForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "bos_token_id": 1,
-  "eos_token_id": 2,
-  "hidden_act": "silu",
-  "hidden_size": 2048,
-  "initializer_range": 0.02,
-  "intermediate_size": 5632,
-  "max_position_embeddings": 4096,
-  "model_type": "llama",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 22,
-  "num_key_value_heads": 4,
-  "pretraining_tp": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
-  "rope_theta": 10000.0,
-  "tie_word_embeddings": false,
-  "torch_dtype": "float16",
-  "transformers_version": "4.36.2",
-  "use_cache": false,
-  "vocab_size": 32000
-}

generation_config.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "bos_token_id": 1,
-  "eos_token_id": 2,
-  "max_length": 2048,
-  "pad_token_id": 0,
-  "transformers_version": "4.36.2"
-}

pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc780bb0faf671a3cb8409e7b4aab151cf6c760ad7ebe2748a189370924e3bfb
-size 2200123773

special_tokens_map.json DELETED Viewed

@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "</s>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

tokenizer.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
-size 499723

tokenizer_config.json DELETED Viewed

@@ -1,44 +0,0 @@
-{
-  "add_bos_token": true,
-  "add_eos_token": false,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "</s>",
-  "legacy": false,
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "</s>",
-  "padding_side": "right",
-  "sp_model_kwargs": {},
-  "spaces_between_special_tokens": false,
-  "tokenizer_class": "LlamaTokenizer",
-  "trust_remote_code": false,
-  "unk_token": "<unk>",
-  "use_default_system_prompt": false,
-  "use_fast": true
-}