Wonder-Griffin
commited on
Commit
•
dd1a04c
1
Parent(s):
0ee3323
Update README.md
Browse files
README.md
CHANGED
@@ -13,40 +13,4 @@ language:
|
|
13 |
- en
|
14 |
pipeline_tag: text-generation
|
15 |
---
|
16 |
-
|
17 |
-
|
18 |
-
## Shorsey-T2000
|
19 |
-
|
20 |
-
The Chirpinator
|
21 |
-
|
22 |
-
## Model description
|
23 |
-
|
24 |
-
More information needed
|
25 |
-
|
26 |
-
## Intended uses & limitations
|
27 |
-
|
28 |
-
More information needed
|
29 |
-
|
30 |
-
## Training and evaluation data
|
31 |
-
|
32 |
-
More information needed
|
33 |
-
|
34 |
-
## Training procedure
|
35 |
-
|
36 |
-
### Training hyperparameters
|
37 |
-
|
38 |
-
The following hyperparameters were used during training:
|
39 |
-
- learning_rate: 5e-05
|
40 |
-
- train_batch_size: 8
|
41 |
-
- eval_batch_size: 8
|
42 |
-
- seed: 42
|
43 |
-
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
44 |
-
- lr_scheduler_type: linear
|
45 |
-
|
46 |
-
|
47 |
-
### Framework versions
|
48 |
-
|
49 |
-
- Transformers 4.44.2
|
50 |
-
- Pytorch 2.4.0+cu124
|
51 |
-
- Datasets 2.20.0
|
52 |
-
- Tokenizers 0.19.1
|
|
|
13 |
- en
|
14 |
pipeline_tag: text-generation
|
15 |
---
|
16 |
+
SafetensorsRepoMetadata(metadata=None, sharded=False, weight_map={'casual_lm_head.bias': 'model.safetensors', 'casual_lm_head.weight': 'model.safetensors', 'embedding.weight': 'model.safetensors', 'general_head.bias': 'model.safetensors', 'general_head.weight': 'model.safetensors', 'pos_encoding.pe': 'model.safetensors', 'qa_head.bias': 'model.safetensors', 'qa_head.weight': 'model.safetensors', 'rnn.bias_hh_l0': 'model.safetensors', 'rnn.bias_hh_l0_reverse': 'model.safetensors', 'rnn.bias_ih_l0': 'model.safetensors', 'rnn.bias_ih_l0_reverse': 'model.safetensors', 'rnn.weight_hh_l0': 'model.safetensors', 'rnn.weight_hh_l0_reverse': 'model.safetensors', 'rnn.weight_ih_l0': 'model.safetensors', 'rnn.weight_ih_l0_reverse': 'model.safetensors', 'transformer_blocks.0.attention.multihead_attn.in_proj_bias': 'model.safetensors', 'transformer_blocks.0.attention.multihead_attn.in_proj_weight': 'model.safetensors', 'transformer_blocks.0.attention.multihead_attn.out_proj.bias': 'model.safetensors', 'transformer_blocks.0.attention.multihead_attn.out_proj.weight': 'model.safetensors', 'transformer_blocks.0.feed_forward.fc1.bias': 'model.safetensors', 'transformer_blocks.0.feed_forward.fc1.weight': 'model.safetensors', 'transformer_blocks.0.feed_forward.fc2.bias': 'model.safetensors', 'transformer_blocks.0.feed_forward.fc2.weight': 'model.safetensors', 'transformer_blocks.0.layernorm1.bias': 'model.safetensors', 'transformer_blocks.0.layernorm1.weight': 'model.safetensors', 'transformer_blocks.0.layernorm2.bias': 'model.safetensors', 'transformer_blocks.0.layernorm2.weight': 'model.safetensors', 'transformer_blocks.1.attention.multihead_attn.in_proj_bias': 'model.safetensors', 'transformer_blocks.1.attention.multihead_attn.in_proj_weight': 'model.safetensors', 'transformer_blocks.1.attention.multihead_attn.out_proj.bias': 'model.safetensors', 'transformer_blocks.1.attention.multihead_attn.out_proj.weight': 'model.safetensors', 'transformer_blocks.1.feed_forward.fc1.bias': 'model.safetensors', 'transformer_blocks.1.feed_forward.fc1.weight': 'model.safetensors', 'transformer_blocks.1.feed_forward.fc2.bias': 'model.safetensors', 'transformer_blocks.1.feed_forward.fc2.weight': 'model.safetensors', 'transformer_blocks.1.layernorm1.bias': 'model.safetensors', 'transformer_blocks.1.layernorm1.weight': 'model.safetensors', 'transformer_blocks.1.layernorm2.bias': 'model.safetensors', 'transformer_blocks.1.layernorm2.weight': 'model.safetensors', 'transformer_blocks.2.attention.multihead_attn.in_proj_bias': 'model.safetensors', 'transformer_blocks.2.attention.multihead_attn.in_proj_weight': 'model.safetensors', 'transformer_blocks.2.attention.multihead_attn.out_proj.bias': 'model.safetensors', 'transformer_blocks.2.attention.multihead_attn.out_proj.weight': 'model.safetensors', 'transformer_blocks.2.feed_forward.fc1.bias': 'model.safetensors', 'transformer_blocks.2.feed_forward.fc1.weight': 'model.safetensors', 'transformer_blocks.2.feed_forward.fc2.bias': 'model.safetensors', 'transformer_blocks.2.feed_forward.fc2.weight': 'model.safetensors', 'transformer_blocks.2.layernorm1.bias': 'model.safetensors', 'transformer_blocks.2.layernorm1.weight': 'model.safetensors', 'transformer_blocks.2.layernorm2.bias': 'model.safetensors', 'transformer_blocks.2.layernorm2.weight': 'model.safetensors', 'transformer_blocks.3.attention.multihead_attn.in_proj_bias': 'model.safetensors', 'transformer_blocks.3.attention.multihead_attn.in_proj_weight': 'model.safetensors', 'transformer_blocks.3.attention.multihead_attn.out_proj.bias': 'model.safetensors', 'transformer_blocks.3.attention.multihead_attn.out_proj.weight': 'model.safetensors', 'transformer_blocks.3.feed_forward.fc1.bias': 'model.safetensors', 'transformer_blocks.3.feed_forward.fc1.weight': 'model.safetensors', 'transformer_blocks.3.feed_forward.fc2.bias': 'model.safetensors', 'transformer_blocks.3.feed_forward.fc2.weight': 'model.safetensors', 'transformer_blocks.3.layernorm1.bias': 'model.safetensors', 'transformer_blocks.3.layernorm1.weight': 'model.safetensors', 'transformer_blocks.3.layernorm2.bias': 'model.safetensors', 'transformer_blocks.3.layernorm2.weight': 'model.safetensors'}, files_metadata={'model.safetensors': SafetensorsFileMetadata(metadata={'format': 'pt'}, tensors={'casual_lm_head.bias': TensorInfo(dtype='F32', shape=[60000], data_offsets=(0, 240000), parameter_count=60000), 'casual_lm_head.weight': TensorInfo(dtype='F32', shape=[60000, 1024], data_offsets=(240000, 246000000), parameter_count=61440000), 'embedding.weight': TensorInfo(dtype='F32', shape=[60000, 512], data_offsets=(246000000, 368880000), parameter_count=30720000), 'general_head.bias': TensorInfo(dtype='F32', shape=[60000], data_offsets=(368880000, 369120000), parameter_count=60000), 'general_head.weight': TensorInfo(dtype='F32', shape=[60000, 1024], data_offsets=(369120000, 614880000), parameter_count=61440000), 'pos_encoding.pe': TensorInfo(dtype='F32', shape=[1, 512, 512], data_offsets=(614880000, 615928576), parameter_count=262144), 'qa_head.bias': TensorInfo(dtype='F32', shape=[5], data_offsets=(615928576, 615928596), parameter_count=5), 'qa_head.weight': TensorInfo(dtype='F32', shape=[5, 1024], data_offsets=(615928596, 615949076), parameter_count=5120), 'rnn.bias_hh_l0': TensorInfo(dtype='F32', shape=[2048], data_offsets=(615949076, 615957268), parameter_count=2048), 'rnn.bias_hh_l0_reverse': TensorInfo(dtype='F32', shape=[2048], data_offsets=(615957268, 615965460), parameter_count=2048), 'rnn.bias_ih_l0': TensorInfo(dtype='F32', shape=[2048], data_offsets=(615965460, 615973652), parameter_count=2048), 'rnn.bias_ih_l0_reverse': TensorInfo(dtype='F32', shape=[2048], data_offsets=(615973652, 615981844), parameter_count=2048), 'rnn.weight_hh_l0': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(615981844, 620176148), parameter_count=1048576), 'rnn.weight_hh_l0_reverse': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(620176148, 624370452), parameter_count=1048576), 'rnn.weight_ih_l0': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(624370452, 628564756), parameter_count=1048576), 'rnn.weight_ih_l0_reverse': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(628564756, 632759060), parameter_count=1048576), 'transformer_blocks.0.attention.multihead_attn.in_proj_bias': TensorInfo(dtype='F32', shape=[1536], data_offsets=(632759060, 632765204), parameter_count=1536), 'transformer_blocks.0.attention.multihead_attn.in_proj_weight': TensorInfo(dtype='F32', shape=[1536, 512], data_offsets=(632765204, 635910932), parameter_count=786432), 'transformer_blocks.0.attention.multihead_attn.out_proj.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(635910932, 635912980), parameter_count=512), 'transformer_blocks.0.attention.multihead_attn.out_proj.weight': TensorInfo(dtype='F32', shape=[512, 512], data_offsets=(635912980, 636961556), parameter_count=262144), 'transformer_blocks.0.feed_forward.fc1.bias': TensorInfo(dtype='F32', shape=[2048], data_offsets=(636961556, 636969748), parameter_count=2048), 'transformer_blocks.0.feed_forward.fc1.weight': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(636969748, 641164052), parameter_count=1048576), 'transformer_blocks.0.feed_forward.fc2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(641164052, 641166100), parameter_count=512), 'transformer_blocks.0.feed_forward.fc2.weight': TensorInfo(dtype='F32', shape=[512, 2048], data_offsets=(641166100, 645360404), parameter_count=1048576), 'transformer_blocks.0.layernorm1.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(645360404, 645362452), parameter_count=512), 'transformer_blocks.0.layernorm1.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(645362452, 645364500), parameter_count=512), 'transformer_blocks.0.layernorm2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(645364500, 645366548), parameter_count=512), 'transformer_blocks.0.layernorm2.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(645366548, 645368596), parameter_count=512), 'transformer_blocks.1.attention.multihead_attn.in_proj_bias': TensorInfo(dtype='F32', shape=[1536], data_offsets=(645368596, 645374740), parameter_count=1536), 'transformer_blocks.1.attention.multihead_attn.in_proj_weight': TensorInfo(dtype='F32', shape=[1536, 512], data_offsets=(645374740, 648520468), parameter_count=786432), 'transformer_blocks.1.attention.multihead_attn.out_proj.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(648520468, 648522516), parameter_count=512), 'transformer_blocks.1.attention.multihead_attn.out_proj.weight': TensorInfo(dtype='F32', shape=[512, 512], data_offsets=(648522516, 649571092), parameter_count=262144), 'transformer_blocks.1.feed_forward.fc1.bias': TensorInfo(dtype='F32', shape=[2048], data_offsets=(649571092, 649579284), parameter_count=2048), 'transformer_blocks.1.feed_forward.fc1.weight': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(649579284, 653773588), parameter_count=1048576), 'transformer_blocks.1.feed_forward.fc2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(653773588, 653775636), parameter_count=512), 'transformer_blocks.1.feed_forward.fc2.weight': TensorInfo(dtype='F32', shape=[512, 2048], data_offsets=(653775636, 657969940), parameter_count=1048576), 'transformer_blocks.1.layernorm1.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(657969940, 657971988), parameter_count=512), 'transformer_blocks.1.layernorm1.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(657971988, 657974036), parameter_count=512), 'transformer_blocks.1.layernorm2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(657974036, 657976084), parameter_count=512), 'transformer_blocks.1.layernorm2.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(657976084, 657978132), parameter_count=512), 'transformer_blocks.2.attention.multihead_attn.in_proj_bias': TensorInfo(dtype='F32', shape=[1536], data_offsets=(657978132, 657984276), parameter_count=1536), 'transformer_blocks.2.attention.multihead_attn.in_proj_weight': TensorInfo(dtype='F32', shape=[1536, 512], data_offsets=(657984276, 661130004), parameter_count=786432), 'transformer_blocks.2.attention.multihead_attn.out_proj.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(661130004, 661132052), parameter_count=512), 'transformer_blocks.2.attention.multihead_attn.out_proj.weight': TensorInfo(dtype='F32', shape=[512, 512], data_offsets=(661132052, 662180628), parameter_count=262144), 'transformer_blocks.2.feed_forward.fc1.bias': TensorInfo(dtype='F32', shape=[2048], data_offsets=(662180628, 662188820), parameter_count=2048), 'transformer_blocks.2.feed_forward.fc1.weight': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(662188820, 666383124), parameter_count=1048576), 'transformer_blocks.2.feed_forward.fc2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(666383124, 666385172), parameter_count=512), 'transformer_blocks.2.feed_forward.fc2.weight': TensorInfo(dtype='F32', shape=[512, 2048], data_offsets=(666385172, 670579476), parameter_count=1048576), 'transformer_blocks.2.layernorm1.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(670579476, 670581524), parameter_count=512), 'transformer_blocks.2.layernorm1.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(670581524, 670583572), parameter_count=512), 'transformer_blocks.2.layernorm2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(670583572, 670585620), parameter_count=512), 'transformer_blocks.2.layernorm2.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(670585620, 670587668), parameter_count=512), 'transformer_blocks.3.attention.multihead_attn.in_proj_bias': TensorInfo(dtype='F32', shape=[1536], data_offsets=(670587668, 670593812), parameter_count=1536), 'transformer_blocks.3.attention.multihead_attn.in_proj_weight': TensorInfo(dtype='F32', shape=[1536, 512], data_offsets=(670593812, 673739540), parameter_count=786432), 'transformer_blocks.3.attention.multihead_attn.out_proj.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(673739540, 673741588), parameter_count=512), 'transformer_blocks.3.attention.multihead_attn.out_proj.weight': TensorInfo(dtype='F32', shape=[512, 512], data_offsets=(673741588, 674790164), parameter_count=262144), 'transformer_blocks.3.feed_forward.fc1.bias': TensorInfo(dtype='F32', shape=[2048], data_offsets=(674790164, 674798356), parameter_count=2048), 'transformer_blocks.3.feed_forward.fc1.weight': TensorInfo(dtype='F32', shape=[2048, 512], data_offsets=(674798356, 678992660), parameter_count=1048576), 'transformer_blocks.3.feed_forward.fc2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(678992660, 678994708), parameter_count=512), 'transformer_blocks.3.feed_forward.fc2.weight': TensorInfo(dtype='F32', shape=[512, 2048], data_offsets=(678994708, 683189012), parameter_count=1048576), 'transformer_blocks.3.layernorm1.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(683189012, 683191060), parameter_count=512), 'transformer_blocks.3.layernorm1.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(683191060, 683193108), parameter_count=512), 'transformer_blocks.3.layernorm2.bias': TensorInfo(dtype='F32', shape=[512], data_offsets=(683193108, 683195156), parameter_count=512), 'transformer_blocks.3.layernorm2.weight': TensorInfo(dtype='F32', shape=[512], data_offsets=(683195156, 683197204), parameter_count=512)}, parameter_count={'F32': 170799301})}, parameter_count={'F32': 170799301})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|