File size: 1,183 Bytes
d90b3a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Suggested data paths when using GPT-NeoX locally
{
  "data_path": "data/enwik8/enwik8_text_document",

  # or for weighted datasets:
  # "train-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"],
  # "test-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"],
  # "valid-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"],
  # "train-data-weights": [1., 2.],
  # "test-data-weights": [2., 1.],
  # "valid-data-weights": [0.5, 0.4],

  # If weight_by_num_documents is True, Builds dataset weights from a multinomial distribution over groups of data according to the number of documents in each group.
  # WARNING: setting this to True will override any user provided weights
  # "weight_by_num_documents": false,
  # "weighted_sampler_alpha": 0.3,

  "vocab_file": "data/gpt2-vocab.json",
  "merge_file": "data/gpt2-merges.txt",

  "save": "checkpoints",
  "load": "checkpoints",
  "checkpoint_validation_with_forward_pass": False,

  "tensorboard_dir": "tensorboard",
  "log_dir": "logs",
  "use_wandb": True,
  "wandb_host": "https://api.wandb.ai",
  "wandb_project": "neox"
}