Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

README.md +202 -0
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +42 -0
trainer_state.json +411 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "gate_proj",
+    "down_proj",
+    "v_proj",
+    "up_proj",
+    "q_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b986e5628fcddc93d2bcf9557cf4b2a398a48c0b28e5ff6ceb1355b75266c3a
+size 25271744

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16e3fbc11dc1ad61f97ecee0cef2cf2c6824b8eb254e1febfa99799c70de2255
+size 50721146

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d262bf1ffd81b0f22b3ae9262caa3482bf809e7a541597c7cf604f673ce136e0
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aec8bbdcc936a90bf199fe8ea45ad0374eda8f82ad8cbe5bcb1b55932d77a3a9
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,411 @@

+{
+  "best_metric": 1.349927544593811,
+  "best_model_checkpoint": "checkpoints/sft_2_1_1/checkpoint-2555",
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 2555,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.1506849315068493,
+      "grad_norm": 18.4865665435791,
+      "learning_rate": 5.018248175182482e-07,
+      "loss": 2.5927,
+      "step": 55
+    },
+    {
+      "epoch": 0.3013698630136986,
+      "grad_norm": 16.606660842895508,
+      "learning_rate": 1.0036496350364965e-06,
+      "loss": 2.3833,
+      "step": 110
+    },
+    {
+      "epoch": 0.4520547945205479,
+      "grad_norm": 6.788235187530518,
+      "learning_rate": 1.5054744525547446e-06,
+      "loss": 1.8868,
+      "step": 165
+    },
+    {
+      "epoch": 0.6027397260273972,
+      "grad_norm": 3.3164093494415283,
+      "learning_rate": 2.007299270072993e-06,
+      "loss": 1.5665,
+      "step": 220
+    },
+    {
+      "epoch": 0.7534246575342466,
+      "grad_norm": 3.4226760864257812,
+      "learning_rate": 2.509124087591241e-06,
+      "loss": 1.4994,
+      "step": 275
+    },
+    {
+      "epoch": 0.9041095890410958,
+      "grad_norm": 3.687007427215576,
+      "learning_rate": 3.0109489051094893e-06,
+      "loss": 1.4708,
+      "step": 330
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.4502822160720825,
+      "eval_runtime": 41.7137,
+      "eval_samples_per_second": 23.973,
+      "eval_steps_per_second": 2.997,
+      "step": 365
+    },
+    {
+      "epoch": 1.0547945205479452,
+      "grad_norm": 3.667193651199341,
+      "learning_rate": 3.5127737226277376e-06,
+      "loss": 1.4589,
+      "step": 385
+    },
+    {
+      "epoch": 1.2054794520547945,
+      "grad_norm": 3.444368362426758,
+      "learning_rate": 4.014598540145986e-06,
+      "loss": 1.4383,
+      "step": 440
+    },
+    {
+      "epoch": 1.356164383561644,
+      "grad_norm": 3.4761803150177,
+      "learning_rate": 4.516423357664234e-06,
+      "loss": 1.4421,
+      "step": 495
+    },
+    {
+      "epoch": 1.5068493150684932,
+      "grad_norm": 3.8773984909057617,
+      "learning_rate": 4.9999979671535945e-06,
+      "loss": 1.4388,
+      "step": 550
+    },
+    {
+      "epoch": 1.6575342465753424,
+      "grad_norm": 3.5462825298309326,
+      "learning_rate": 4.998349002034396e-06,
+      "loss": 1.4198,
+      "step": 605
+    },
+    {
+      "epoch": 1.808219178082192,
+      "grad_norm": 3.9237027168273926,
+      "learning_rate": 4.993627701726671e-06,
+      "loss": 1.4052,
+      "step": 660
+    },
+    {
+      "epoch": 1.958904109589041,
+      "grad_norm": 3.995187997817993,
+      "learning_rate": 4.9858398722315225e-06,
+      "loss": 1.4121,
+      "step": 715
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.4027259349822998,
+      "eval_runtime": 41.7142,
+      "eval_samples_per_second": 23.973,
+      "eval_steps_per_second": 2.997,
+      "step": 730
+    },
+    {
+      "epoch": 2.1095890410958904,
+      "grad_norm": 3.973104238510132,
+      "learning_rate": 4.974995090602673e-06,
+      "loss": 1.4018,
+      "step": 770
+    },
+    {
+      "epoch": 2.26027397260274,
+      "grad_norm": 4.114542484283447,
+      "learning_rate": 4.9611066931691045e-06,
+      "loss": 1.3977,
+      "step": 825
+    },
+    {
+      "epoch": 2.410958904109589,
+      "grad_norm": 4.350598335266113,
+      "learning_rate": 4.94419175913477e-06,
+      "loss": 1.3778,
+      "step": 880
+    },
+    {
+      "epoch": 2.5616438356164384,
+      "grad_norm": 3.951005697250366,
+      "learning_rate": 4.9242710895755e-06,
+      "loss": 1.372,
+      "step": 935
+    },
+    {
+      "epoch": 2.712328767123288,
+      "grad_norm": 4.071479797363281,
+      "learning_rate": 4.9013691818589635e-06,
+      "loss": 1.3826,
+      "step": 990
+    },
+    {
+      "epoch": 2.863013698630137,
+      "grad_norm": 3.968268632888794,
+      "learning_rate": 4.87551419951912e-06,
+      "loss": 1.3845,
+      "step": 1045
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.3834009170532227,
+      "eval_runtime": 41.778,
+      "eval_samples_per_second": 23.936,
+      "eval_steps_per_second": 2.992,
+      "step": 1095
+    },
+    {
+      "epoch": 3.0136986301369864,
+      "grad_norm": 4.093992233276367,
+      "learning_rate": 4.8467379376222215e-06,
+      "loss": 1.3736,
+      "step": 1100
+    },
+    {
+      "epoch": 3.1643835616438354,
+      "grad_norm": 4.021303176879883,
+      "learning_rate": 4.815075783666952e-06,
+      "loss": 1.3547,
+      "step": 1155
+    },
+    {
+      "epoch": 3.315068493150685,
+      "grad_norm": 4.797937393188477,
+      "learning_rate": 4.780566674066782e-06,
+      "loss": 1.3671,
+      "step": 1210
+    },
+    {
+      "epoch": 3.4657534246575343,
+      "grad_norm": 4.535392761230469,
+      "learning_rate": 4.743253046268069e-06,
+      "loss": 1.3545,
+      "step": 1265
+    },
+    {
+      "epoch": 3.616438356164384,
+      "grad_norm": 4.504812717437744,
+      "learning_rate": 4.703180786562761e-06,
+      "loss": 1.3623,
+      "step": 1320
+    },
+    {
+      "epoch": 3.767123287671233,
+      "grad_norm": 4.607705116271973,
+      "learning_rate": 4.660399173659908e-06,
+      "loss": 1.3487,
+      "step": 1375
+    },
+    {
+      "epoch": 3.9178082191780823,
+      "grad_norm": 4.659298896789551,
+      "learning_rate": 4.6149608180853545e-06,
+      "loss": 1.3502,
+      "step": 1430
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.3703773021697998,
+      "eval_runtime": 41.7996,
+      "eval_samples_per_second": 23.924,
+      "eval_steps_per_second": 2.99,
+      "step": 1460
+    },
+    {
+      "epoch": 4.068493150684931,
+      "grad_norm": 4.691000461578369,
+      "learning_rate": 4.566921597484149e-06,
+      "loss": 1.3453,
+      "step": 1485
+    },
+    {
+      "epoch": 4.219178082191781,
+      "grad_norm": 4.80633020401001,
+      "learning_rate": 4.51634058790522e-06,
+      "loss": 1.3329,
+      "step": 1540
+    },
+    {
+      "epoch": 4.36986301369863,
+      "grad_norm": 5.040696144104004,
+      "learning_rate": 4.463279991152828e-06,
+      "loss": 1.3329,
+      "step": 1595
+    },
+    {
+      "epoch": 4.52054794520548,
+      "grad_norm": 5.084527015686035,
+      "learning_rate": 4.407805058294135e-06,
+      "loss": 1.3453,
+      "step": 1650
+    },
+    {
+      "epoch": 4.671232876712329,
+      "grad_norm": 5.078038692474365,
+      "learning_rate": 4.349984009416952e-06,
+      "loss": 1.3266,
+      "step": 1705
+    },
+    {
+      "epoch": 4.821917808219178,
+      "grad_norm": 5.201215744018555,
+      "learning_rate": 4.289887949736347e-06,
+      "loss": 1.3281,
+      "step": 1760
+    },
+    {
+      "epoch": 4.972602739726027,
+      "grad_norm": 4.974658966064453,
+      "learning_rate": 4.227590782153277e-06,
+      "loss": 1.3168,
+      "step": 1815
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 1.3636702299118042,
+      "eval_runtime": 41.8147,
+      "eval_samples_per_second": 23.915,
+      "eval_steps_per_second": 2.989,
+      "step": 1825
+    },
+    {
+      "epoch": 5.123287671232877,
+      "grad_norm": 5.115445137023926,
+      "learning_rate": 4.16316911637277e-06,
+      "loss": 1.3135,
+      "step": 1870
+    },
+    {
+      "epoch": 5.273972602739726,
+      "grad_norm": 5.82274055480957,
+      "learning_rate": 4.0967021746934436e-06,
+      "loss": 1.3107,
+      "step": 1925
+    },
+    {
+      "epoch": 5.424657534246576,
+      "grad_norm": 5.606359481811523,
+      "learning_rate": 4.02827169458417e-06,
+      "loss": 1.301,
+      "step": 1980
+    },
+    {
+      "epoch": 5.575342465753424,
+      "grad_norm": 5.442434787750244,
+      "learning_rate": 3.957961828167748e-06,
+      "loss": 1.3171,
+      "step": 2035
+    },
+    {
+      "epoch": 5.726027397260274,
+      "grad_norm": 5.444327354431152,
+      "learning_rate": 3.885859038735141e-06,
+      "loss": 1.3045,
+      "step": 2090
+    },
+    {
+      "epoch": 5.876712328767123,
+      "grad_norm": 5.671774864196777,
+      "learning_rate": 3.8120519944175767e-06,
+      "loss": 1.3036,
+      "step": 2145
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 1.353081464767456,
+      "eval_runtime": 41.6872,
+      "eval_samples_per_second": 23.988,
+      "eval_steps_per_second": 2.999,
+      "step": 2190
+    },
+    {
+      "epoch": 6.027397260273973,
+      "grad_norm": 5.856392860412598,
+      "learning_rate": 3.7366314591472484e-06,
+      "loss": 1.2882,
+      "step": 2200
+    },
+    {
+      "epoch": 6.178082191780822,
+      "grad_norm": 6.328695774078369,
+      "learning_rate": 3.659690181040717e-06,
+      "loss": 1.2881,
+      "step": 2255
+    },
+    {
+      "epoch": 6.328767123287671,
+      "grad_norm": 6.592623710632324,
+      "learning_rate": 3.5813227783422654e-06,
+      "loss": 1.278,
+      "step": 2310
+    },
+    {
+      "epoch": 6.47945205479452,
+      "grad_norm": 6.272197723388672,
+      "learning_rate": 3.5016256230674704e-06,
+      "loss": 1.2799,
+      "step": 2365
+    },
+    {
+      "epoch": 6.63013698630137,
+      "grad_norm": 6.509876251220703,
+      "learning_rate": 3.4206967224900885e-06,
+      "loss": 1.2866,
+      "step": 2420
+    },
+    {
+      "epoch": 6.780821917808219,
+      "grad_norm": 6.4894304275512695,
+      "learning_rate": 3.338635598617975e-06,
+      "loss": 1.2952,
+      "step": 2475
+    },
+    {
+      "epoch": 6.931506849315069,
+      "grad_norm": 6.477168560028076,
+      "learning_rate": 3.2555431658062837e-06,
+      "loss": 1.2752,
+      "step": 2530
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 1.349927544593811,
+      "eval_runtime": 41.6959,
+      "eval_samples_per_second": 23.983,
+      "eval_steps_per_second": 2.998,
+      "step": 2555
+    }
+  ],
+  "logging_steps": 55,
+  "max_steps": 5475,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.363484660255949e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dc90d64b5e632efd749360fafda91c1dfba1232b2e429506f0ee2d83ac7dffd
+size 5432