sanchit-gandhi HF staff commited on
Commit
ab7be36
1 Parent(s): 382a3cb

Model save

Browse files
README.md CHANGED
@@ -1,15 +1,12 @@
1
  ---
2
  base_model: sanchit-gandhi/Mistral-7B-v0.1-6-layer
3
  tags:
4
- - alignment-handbook
5
- - trl
6
- - sft
7
- - generated_from_trainer
8
  - trl
9
  - sft
 
10
  - generated_from_trainer
11
  datasets:
12
- - stingning/ultrachat
13
  model-index:
14
  - name: sanchit-gandhi/Mistral-7B-v0.1-6-layer
15
  results: []
@@ -20,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # sanchit-gandhi/Mistral-7B-v0.1-6-layer
22
 
23
- This model is a fine-tuned version of [sanchit-gandhi/Mistral-7B-v0.1-6-layer](https://huggingface.co/sanchit-gandhi/Mistral-7B-v0.1-6-layer) on the stingning/ultrachat dataset.
24
  It achieves the following results on the evaluation set:
25
  - Loss: 1.0042
26
 
 
1
  ---
2
  base_model: sanchit-gandhi/Mistral-7B-v0.1-6-layer
3
  tags:
 
 
 
 
4
  - trl
5
  - sft
6
+ - alignment-handbook
7
  - generated_from_trainer
8
  datasets:
9
+ - generator
10
  model-index:
11
  - name: sanchit-gandhi/Mistral-7B-v0.1-6-layer
12
  results: []
 
17
 
18
  # sanchit-gandhi/Mistral-7B-v0.1-6-layer
19
 
20
+ This model is a fine-tuned version of [sanchit-gandhi/Mistral-7B-v0.1-6-layer](https://huggingface.co/sanchit-gandhi/Mistral-7B-v0.1-6-layer) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 1.0042
23
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 4.944375772558715,
3
  "eval_loss": 1.0042184591293335,
4
- "eval_runtime": 1.489,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 429.142,
7
- "eval_steps_per_second": 2.015,
8
- "total_flos": 9.058112140235663e+19,
9
- "train_loss": 1.0958750234603882,
10
- "train_runtime": 36068.0493,
11
  "train_samples": 1467352,
12
- "train_samples_per_second": 141.954,
13
- "train_steps_per_second": 0.555
14
  }
 
1
  {
2
+ "epoch": 4.944622991347343,
3
  "eval_loss": 1.0042184591293335,
4
+ "eval_runtime": 1.4747,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 433.323,
7
+ "eval_steps_per_second": 2.034,
8
+ "total_flos": 9.058565045825516e+19,
9
+ "train_loss": 4.2969823139893115e-05,
10
+ "train_runtime": 9.0702,
11
  "train_samples": 1467352,
12
+ "train_samples_per_second": 564484.818,
13
+ "train_steps_per_second": 2205.019
14
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.40.1",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.40.1",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.944375772558715,
3
  "eval_loss": 1.0042184591293335,
4
- "eval_runtime": 1.489,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 429.142,
7
- "eval_steps_per_second": 2.015
8
  }
 
1
  {
2
+ "epoch": 4.944622991347343,
3
  "eval_loss": 1.0042184591293335,
4
+ "eval_runtime": 1.4747,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 433.323,
7
+ "eval_steps_per_second": 2.034
8
  }
runs/Apr25_23-48-22_ip-26-0-167-177/events.out.tfevents.1714088955.ip-26-0-167-177.211869.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5bb52086161b69b7bee8d1573821c6c75cfa78e0ebccc0e11f07cbeaa87a85
3
+ size 5066
runs/Apr25_23-48-22_ip-26-0-167-177/events.out.tfevents.1714088965.ip-26-0-167-177.211869.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de8c8ad8159f97d9fe023daef4c143bcbd4d78735d51d18574ae401864f72c77
3
+ size 364
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 4.944375772558715,
3
- "total_flos": 9.058112140235663e+19,
4
- "train_loss": 1.0958750234603882,
5
- "train_runtime": 36068.0493,
6
  "train_samples": 1467352,
7
- "train_samples_per_second": 141.954,
8
- "train_steps_per_second": 0.555
9
  }
 
1
  {
2
+ "epoch": 4.944622991347343,
3
+ "total_flos": 9.058565045825516e+19,
4
+ "train_loss": 4.2969823139893115e-05,
5
+ "train_runtime": 9.0702,
6
  "train_samples": 1467352,
7
+ "train_samples_per_second": 564484.818,
8
+ "train_steps_per_second": 2205.019
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.944375772558715,
5
  "eval_steps": 5000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5648,13 +5648,13 @@
5648
  "step": 20000
5649
  },
5650
  {
5651
- "epoch": 4.944375772558715,
5652
- "step": 20000,
5653
- "total_flos": 9.058112140235663e+19,
5654
- "train_loss": 1.0958750234603882,
5655
- "train_runtime": 36068.0493,
5656
- "train_samples_per_second": 141.954,
5657
- "train_steps_per_second": 0.555
5658
  }
5659
  ],
5660
  "logging_steps": 25,
@@ -5662,7 +5662,7 @@
5662
  "num_input_tokens_seen": 0,
5663
  "num_train_epochs": 5,
5664
  "save_steps": 5000,
5665
- "total_flos": 9.058112140235663e+19,
5666
  "train_batch_size": 32,
5667
  "trial_name": null,
5668
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.944622991347343,
5
  "eval_steps": 5000,
6
+ "global_step": 20001,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5648
  "step": 20000
5649
  },
5650
  {
5651
+ "epoch": 4.944622991347343,
5652
+ "step": 20001,
5653
+ "total_flos": 9.058565045825516e+19,
5654
+ "train_loss": 4.2969823139893115e-05,
5655
+ "train_runtime": 9.0702,
5656
+ "train_samples_per_second": 564484.818,
5657
+ "train_steps_per_second": 2205.019
5658
  }
5659
  ],
5660
  "logging_steps": 25,
 
5662
  "num_input_tokens_seen": 0,
5663
  "num_train_epochs": 5,
5664
  "save_steps": 5000,
5665
+ "total_flos": 9.058565045825516e+19,
5666
  "train_batch_size": 32,
5667
  "trial_name": null,
5668
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781c0a2347f9f34d96f08a02d9262669403d18a6358c936df9c6d50431430243
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7260f27f85882e7b3af12df5cb1c852d833c8dcc1c57bc5b853702adf12a2c00
3
  size 4984
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log CHANGED
@@ -1,28 +1,28 @@
1
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Configure stats pid to 156194
3
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/settings
5
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
- 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py'}
8
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_setup.py:_flush():76] Applying login settings: {}
9
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:_log_setup():521] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/logs/debug.log
10
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:_log_setup():522] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/logs/debug-internal.log
11
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():561] calling init triggers
12
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
13
  config: {}
14
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():611] starting backend
15
- 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():615] setting up manager
16
- 2024-04-25 13:45:18,231 INFO MainThread:156194 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2024-04-25 13:45:18,239 INFO MainThread:156194 [wandb_init.py:init():623] backend started and connected
18
- 2024-04-25 13:45:18,241 INFO MainThread:156194 [wandb_init.py:init():715] updated telemetry
19
- 2024-04-25 13:45:18,267 INFO MainThread:156194 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
20
- 2024-04-25 13:45:18,506 INFO MainThread:156194 [wandb_run.py:_on_init():2357] communicating current version
21
- 2024-04-25 13:45:18,558 INFO MainThread:156194 [wandb_run.py:_on_init():2366] got version response
22
- 2024-04-25 13:45:18,558 INFO MainThread:156194 [wandb_init.py:init():799] starting run threads in backend
23
- 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_console_start():2335] atexit reg
24
- 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2190] redirect: wrap_raw
25
- 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2255] Wrapping output streams.
26
- 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2280] Redirects installed.
27
- 2024-04-25 13:45:22,709 INFO MainThread:156194 [wandb_init.py:init():842] run started, returning control to user process
28
- 2024-04-25 13:45:22,711 INFO MainThread:156194 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_13-44-28_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
 
1
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Configure stats pid to 211869
3
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/settings
5
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py'}
8
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:_log_setup():521] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/logs/debug.log
10
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:_log_setup():522] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/logs/debug-internal.log
11
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():561] calling init triggers
12
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
13
  config: {}
14
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():611] starting backend
15
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():615] setting up manager
16
+ 2024-04-25 23:49:16,508 INFO MainThread:211869 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-04-25 23:49:16,515 INFO MainThread:211869 [wandb_init.py:init():623] backend started and connected
18
+ 2024-04-25 23:49:16,518 INFO MainThread:211869 [wandb_init.py:init():715] updated telemetry
19
+ 2024-04-25 23:49:16,536 INFO MainThread:211869 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
20
+ 2024-04-25 23:49:16,777 INFO MainThread:211869 [wandb_run.py:_on_init():2357] communicating current version
21
+ 2024-04-25 23:49:16,826 INFO MainThread:211869 [wandb_run.py:_on_init():2366] got version response
22
+ 2024-04-25 23:49:16,826 INFO MainThread:211869 [wandb_init.py:init():799] starting run threads in backend
23
+ 2024-04-25 23:49:21,061 INFO MainThread:211869 [wandb_run.py:_console_start():2335] atexit reg
24
+ 2024-04-25 23:49:21,061 INFO MainThread:211869 [wandb_run.py:_redirect():2190] redirect: wrap_raw
25
+ 2024-04-25 23:49:21,061 INFO MainThread:211869 [wandb_run.py:_redirect():2255] Wrapping output streams.
26
+ 2024-04-25 23:49:21,062 INFO MainThread:211869 [wandb_run.py:_redirect():2280] Redirects installed.
27
+ 2024-04-25 23:49:21,063 INFO MainThread:211869 [wandb_init.py:init():842] run started, returning control to user process
28
+ 2024-04-25 23:49:21,064 INFO MainThread:211869 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_23-48-22_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
wandb/run-20240425_134518-etajcxpg/files/output.log CHANGED
@@ -18874,3 +18874,5 @@ Upload 2 LFS files: 100%|██████████| 2/2 [00:00<00:00, 5.07
18874
  [INFO|tokenization_utils_base.py:2488] 2024-04-25 23:47:40,804 >> tokenizer config file saved in ./tokenizer_config.json
18875
  [INFO|tokenization_utils_base.py:2497] 2024-04-25 23:47:40,806 >> Special tokens file saved in ./special_tokens_map.json
18876
  [INFO|modelcard.py:450] 2024-04-25 23:47:40,849 >> Dropping the following result as it does not have all the necessary fields:
 
 
 
18874
  [INFO|tokenization_utils_base.py:2488] 2024-04-25 23:47:40,804 >> tokenizer config file saved in ./tokenizer_config.json
18875
  [INFO|tokenization_utils_base.py:2497] 2024-04-25 23:47:40,806 >> Special tokens file saved in ./special_tokens_map.json
18876
  [INFO|modelcard.py:450] 2024-04-25 23:47:40,849 >> Dropping the following result as it does not have all the necessary fields:
18877
+ {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'dataset': {'name': 'stingning/ultrachat', 'type': 'stingning/ultrachat', 'config': 'default', 'split': 'train', 'args': 'default'}}
18878
+ 2024-04-25 23:47:45 - INFO - __main__ - *** Training complete ***
wandb/run-20240425_134518-etajcxpg/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 0.8191, "train/grad_norm": 0.6796875, "train/learning_rate": 0.0, "train/epoch": 4.944375772558715, "train/global_step": 20000, "_timestamp": 1714088841.5880787, "_runtime": 36123.348269701004, "_step": 806, "eval/loss": 1.0042184591293335, "eval/runtime": 1.489, "eval/samples_per_second": 429.142, "eval/steps_per_second": 2.015, "train_runtime": 36068.0493, "train_samples_per_second": 141.954, "train_steps_per_second": 0.555, "total_flos": 9.058112140235663e+19, "train_loss": 1.0958750234603882}
 
1
+ {"train/loss": 0.8191, "train/grad_norm": 0.6796875, "train/learning_rate": 0.0, "train/epoch": 4.944375772558715, "train/global_step": 20000, "_timestamp": 1714088841.5880787, "_runtime": 36123.348269701004, "_step": 806, "eval/loss": 1.0042184591293335, "eval/runtime": 1.489, "eval/samples_per_second": 429.142, "eval/steps_per_second": 2.015, "train_runtime": 36068.0493, "train_samples_per_second": 141.954, "train_steps_per_second": 0.555, "total_flos": 9.058112140235663e+19, "train_loss": 1.0958750234603882, "_wandb": {"runtime": 36146}}
wandb/run-20240425_134518-etajcxpg/logs/debug-internal.log CHANGED
@@ -37486,3 +37486,146 @@
37486
  2024-04-25 23:47:38,974 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: status_report
37487
  2024-04-25 23:47:41,480 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: internal_messages
37488
  2024-04-25 23:47:42,275 INFO Thread-12 :156911 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/output.log
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37486
  2024-04-25 23:47:38,974 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: status_report
37487
  2024-04-25 23:47:41,480 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: internal_messages
37488
  2024-04-25 23:47:42,275 INFO Thread-12 :156911 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/output.log
37489
+ 2024-04-25 23:47:44,851 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: status_report
37490
+ 2024-04-25 23:47:45,210 DEBUG SenderThread:156911 [sender.py:send():379] send: exit
37491
+ 2024-04-25 23:47:45,210 INFO SenderThread:156911 [sender.py:send_exit():586] handling exit code: 0
37492
+ 2024-04-25 23:47:45,211 INFO SenderThread:156911 [sender.py:send_exit():588] handling runtime: 36146
37493
+ 2024-04-25 23:47:45,213 INFO SenderThread:156911 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
37494
+ 2024-04-25 23:47:45,213 INFO SenderThread:156911 [sender.py:send_exit():594] send defer
37495
+ 2024-04-25 23:47:45,213 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37496
+ 2024-04-25 23:47:45,213 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 0
37497
+ 2024-04-25 23:47:45,213 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37498
+ 2024-04-25 23:47:45,213 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 0
37499
+ 2024-04-25 23:47:45,213 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 1
37500
+ 2024-04-25 23:47:45,213 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37501
+ 2024-04-25 23:47:45,213 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 1
37502
+ 2024-04-25 23:47:45,213 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37503
+ 2024-04-25 23:47:45,213 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 1
37504
+ 2024-04-25 23:47:45,213 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 2
37505
+ 2024-04-25 23:47:45,214 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37506
+ 2024-04-25 23:47:45,214 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 2
37507
+ 2024-04-25 23:47:45,214 INFO HandlerThread:156911 [system_monitor.py:finish():203] Stopping system monitor
37508
+ 2024-04-25 23:47:45,214 DEBUG SystemMonitor:156911 [system_monitor.py:_start():179] Finished system metrics aggregation loop
37509
+ 2024-04-25 23:47:45,215 INFO HandlerThread:156911 [interfaces.py:finish():202] Joined cpu monitor
37510
+ 2024-04-25 23:47:45,215 DEBUG SystemMonitor:156911 [system_monitor.py:_start():183] Publishing last batch of metrics
37511
+ 2024-04-25 23:47:45,215 INFO HandlerThread:156911 [interfaces.py:finish():202] Joined disk monitor
37512
+ 2024-04-25 23:47:45,280 INFO Thread-12 :156911 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/wandb-summary.json
37513
+ 2024-04-25 23:47:45,963 INFO HandlerThread:156911 [interfaces.py:finish():202] Joined gpu monitor
37514
+ 2024-04-25 23:47:45,963 INFO HandlerThread:156911 [interfaces.py:finish():202] Joined memory monitor
37515
+ 2024-04-25 23:47:45,963 INFO HandlerThread:156911 [interfaces.py:finish():202] Joined network monitor
37516
+ 2024-04-25 23:47:45,964 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37517
+ 2024-04-25 23:47:45,964 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 2
37518
+ 2024-04-25 23:47:45,964 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 3
37519
+ 2024-04-25 23:47:45,965 DEBUG SenderThread:156911 [sender.py:send():379] send: stats
37520
+ 2024-04-25 23:47:45,965 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37521
+ 2024-04-25 23:47:45,965 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 3
37522
+ 2024-04-25 23:47:45,965 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37523
+ 2024-04-25 23:47:45,965 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 3
37524
+ 2024-04-25 23:47:45,965 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 4
37525
+ 2024-04-25 23:47:45,965 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37526
+ 2024-04-25 23:47:45,966 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 4
37527
+ 2024-04-25 23:47:45,966 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37528
+ 2024-04-25 23:47:45,966 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 4
37529
+ 2024-04-25 23:47:45,966 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 5
37530
+ 2024-04-25 23:47:45,966 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37531
+ 2024-04-25 23:47:45,966 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 5
37532
+ 2024-04-25 23:47:45,966 DEBUG SenderThread:156911 [sender.py:send():379] send: summary
37533
+ 2024-04-25 23:47:45,968 INFO SenderThread:156911 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
37534
+ 2024-04-25 23:47:45,968 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37535
+ 2024-04-25 23:47:45,968 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 5
37536
+ 2024-04-25 23:47:45,968 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 6
37537
+ 2024-04-25 23:47:45,968 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37538
+ 2024-04-25 23:47:45,968 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 6
37539
+ 2024-04-25 23:47:45,968 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37540
+ 2024-04-25 23:47:45,968 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 6
37541
+ 2024-04-25 23:47:45,968 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 7
37542
+ 2024-04-25 23:47:45,968 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: status_report
37543
+ 2024-04-25 23:47:45,968 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37544
+ 2024-04-25 23:47:45,968 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 7
37545
+ 2024-04-25 23:47:45,969 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37546
+ 2024-04-25 23:47:45,969 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 7
37547
+ 2024-04-25 23:47:46,210 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: poll_exit
37548
+ 2024-04-25 23:47:48,284 INFO Thread-12 :156911 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/output.log
37549
+ 2024-04-25 23:47:49,328 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 8
37550
+ 2024-04-25 23:47:49,328 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: poll_exit
37551
+ 2024-04-25 23:47:49,328 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37552
+ 2024-04-25 23:47:49,328 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 8
37553
+ 2024-04-25 23:47:49,329 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37554
+ 2024-04-25 23:47:49,329 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 8
37555
+ 2024-04-25 23:47:49,329 INFO SenderThread:156911 [job_builder.py:build():318] Attempting to build job artifact
37556
+ 2024-04-25 23:47:49,330 INFO SenderThread:156911 [job_builder.py:_get_source_type():455] is repo sourced job
37557
+ 2024-04-25 23:47:49,436 INFO SenderThread:156911 [job_builder.py:build():431] adding wandb-job metadata file
37558
+ 2024-04-25 23:47:49,454 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 9
37559
+ 2024-04-25 23:47:49,454 DEBUG SenderThread:156911 [sender.py:send():379] send: artifact
37560
+ 2024-04-25 23:47:49,454 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37561
+ 2024-04-25 23:47:49,456 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 9
37562
+ 2024-04-25 23:47:50,155 INFO wandb-upload_0:156911 [upload_job.py:push():89] Uploaded file /tmp/tmpt5uhix_o/wandb-job.json
37563
+ 2024-04-25 23:47:50,174 INFO wandb-upload_1:156911 [upload_job.py:push():89] Uploaded file /admin/home/sanchit/.local/share/wandb/artifacts/staging/tmpslw5rny7
37564
+ 2024-04-25 23:47:50,212 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: poll_exit
37565
+ 2024-04-25 23:47:50,286 INFO Thread-12 :156911 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/output.log
37566
+ 2024-04-25 23:47:50,813 INFO SenderThread:156911 [sender.py:send_artifact():1468] sent artifact job-https___huggingface.co_sanchit-gandhi_distil-zephyr-1.5b-ssft-ultrachat_run_sft.py - {'id': 'QXJ0aWZhY3Q6ODEwNTI5OTk0', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjE2NjI0NzU4Nw==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6ODA4NTQyNDIx', 'versionIndex': 0}}}
37567
+ 2024-04-25 23:47:50,813 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37568
+ 2024-04-25 23:47:50,813 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 9
37569
+ 2024-04-25 23:47:50,813 INFO SenderThread:156911 [dir_watcher.py:finish():358] shutting down directory watcher
37570
+ 2024-04-25 23:47:51,289 INFO SenderThread:156911 [dir_watcher.py:finish():388] scan: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files
37571
+ 2024-04-25 23:47:51,289 INFO SenderThread:156911 [dir_watcher.py:finish():402] scan save: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/wandb-metadata.json wandb-metadata.json
37572
+ 2024-04-25 23:47:51,289 INFO SenderThread:156911 [dir_watcher.py:finish():402] scan save: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/config.yaml config.yaml
37573
+ 2024-04-25 23:47:51,289 INFO SenderThread:156911 [dir_watcher.py:finish():402] scan save: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/conda-environment.yaml conda-environment.yaml
37574
+ 2024-04-25 23:47:51,290 INFO SenderThread:156911 [dir_watcher.py:finish():402] scan save: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/output.log output.log
37575
+ 2024-04-25 23:47:51,293 INFO SenderThread:156911 [dir_watcher.py:finish():402] scan save: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/requirements.txt requirements.txt
37576
+ 2024-04-25 23:47:51,296 INFO SenderThread:156911 [dir_watcher.py:finish():402] scan save: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/wandb-summary.json wandb-summary.json
37577
+ 2024-04-25 23:47:51,298 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 10
37578
+ 2024-04-25 23:47:51,298 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: poll_exit
37579
+ 2024-04-25 23:47:51,298 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37580
+ 2024-04-25 23:47:51,301 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 10
37581
+ 2024-04-25 23:47:51,301 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37582
+ 2024-04-25 23:47:51,301 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 10
37583
+ 2024-04-25 23:47:51,301 INFO SenderThread:156911 [file_pusher.py:finish():172] shutting down file pusher
37584
+ 2024-04-25 23:47:51,477 INFO wandb-upload_0:156911 [upload_job.py:push():131] Uploaded file /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/config.yaml
37585
+ 2024-04-25 23:47:51,602 INFO wandb-upload_1:156911 [upload_job.py:push():131] Uploaded file /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/conda-environment.yaml
37586
+ 2024-04-25 23:47:51,684 INFO wandb-upload_2:156911 [upload_job.py:push():131] Uploaded file /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/output.log
37587
+ 2024-04-25 23:47:51,688 INFO wandb-upload_3:156911 [upload_job.py:push():131] Uploaded file /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/requirements.txt
37588
+ 2024-04-25 23:47:51,720 INFO wandb-upload_4:156911 [upload_job.py:push():131] Uploaded file /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/files/wandb-summary.json
37589
+ 2024-04-25 23:47:51,920 INFO Thread-11 (_thread_body):156911 [sender.py:transition_state():614] send defer: 11
37590
+ 2024-04-25 23:47:51,921 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37591
+ 2024-04-25 23:47:51,921 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 11
37592
+ 2024-04-25 23:47:51,921 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37593
+ 2024-04-25 23:47:51,921 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 11
37594
+ 2024-04-25 23:47:51,921 INFO SenderThread:156911 [file_pusher.py:join():178] waiting for file pusher
37595
+ 2024-04-25 23:47:51,921 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 12
37596
+ 2024-04-25 23:47:51,922 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37597
+ 2024-04-25 23:47:51,922 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 12
37598
+ 2024-04-25 23:47:51,922 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37599
+ 2024-04-25 23:47:51,922 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 12
37600
+ 2024-04-25 23:47:51,922 INFO SenderThread:156911 [file_stream.py:finish():614] file stream finish called
37601
+ 2024-04-25 23:47:52,002 INFO SenderThread:156911 [file_stream.py:finish():618] file stream finish is done
37602
+ 2024-04-25 23:47:52,002 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 13
37603
+ 2024-04-25 23:47:52,002 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37604
+ 2024-04-25 23:47:52,002 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 13
37605
+ 2024-04-25 23:47:52,002 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37606
+ 2024-04-25 23:47:52,002 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 13
37607
+ 2024-04-25 23:47:52,002 INFO SenderThread:156911 [sender.py:transition_state():614] send defer: 14
37608
+ 2024-04-25 23:47:52,003 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: defer
37609
+ 2024-04-25 23:47:52,003 DEBUG SenderThread:156911 [sender.py:send():379] send: final
37610
+ 2024-04-25 23:47:52,003 INFO HandlerThread:156911 [handler.py:handle_request_defer():172] handle defer: 14
37611
+ 2024-04-25 23:47:52,003 DEBUG SenderThread:156911 [sender.py:send():379] send: footer
37612
+ 2024-04-25 23:47:52,003 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: defer
37613
+ 2024-04-25 23:47:52,003 INFO SenderThread:156911 [sender.py:send_request_defer():610] handle sender defer: 14
37614
+ 2024-04-25 23:47:52,003 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: poll_exit
37615
+ 2024-04-25 23:47:52,004 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: poll_exit
37616
+ 2024-04-25 23:47:52,004 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: poll_exit
37617
+ 2024-04-25 23:47:52,004 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: server_info
37618
+ 2024-04-25 23:47:52,004 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: poll_exit
37619
+ 2024-04-25 23:47:52,004 DEBUG SenderThread:156911 [sender.py:send_request():406] send_request: server_info
37620
+ 2024-04-25 23:47:52,005 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: get_summary
37621
+ 2024-04-25 23:47:52,006 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: sampled_history
37622
+ 2024-04-25 23:47:52,008 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: internal_messages
37623
+ 2024-04-25 23:47:52,058 INFO MainThread:156911 [wandb_run.py:_footer_history_summary_info():3936] rendering history
37624
+ 2024-04-25 23:47:52,059 INFO MainThread:156911 [wandb_run.py:_footer_history_summary_info():3968] rendering summary
37625
+ 2024-04-25 23:47:52,060 INFO MainThread:156911 [wandb_run.py:_footer_sync_info():3895] logging synced files
37626
+ 2024-04-25 23:47:52,060 DEBUG HandlerThread:156911 [handler.py:handle_request():146] handle_request: shutdown
37627
+ 2024-04-25 23:47:52,060 INFO HandlerThread:156911 [handler.py:finish():866] shutting down handler
37628
+ 2024-04-25 23:47:53,004 INFO WriterThread:156911 [datastore.py:close():296] close: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/run-etajcxpg.wandb
37629
+ 2024-04-25 23:47:53,058 INFO SenderThread:156911 [sender.py:finish():1546] shutting down sender
37630
+ 2024-04-25 23:47:53,059 INFO SenderThread:156911 [file_pusher.py:finish():172] shutting down file pusher
37631
+ 2024-04-25 23:47:53,059 INFO SenderThread:156911 [file_pusher.py:join():178] waiting for file pusher
wandb/run-20240425_134518-etajcxpg/logs/debug.log CHANGED
@@ -26,3 +26,4 @@ config: {}
26
  2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2280] Redirects installed.
27
  2024-04-25 13:45:22,709 INFO MainThread:156194 [wandb_init.py:init():842] run started, returning control to user process
28
  2024-04-25 13:45:22,711 INFO MainThread:156194 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_13-44-28_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
 
 
26
  2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2280] Redirects installed.
27
  2024-04-25 13:45:22,709 INFO MainThread:156194 [wandb_init.py:init():842] run started, returning control to user process
28
  2024-04-25 13:45:22,711 INFO MainThread:156194 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_13-44-28_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
29
+ 2024-04-25 23:47:53,062 WARNING MsgRouterThr:156194 [router.py:message_loop():77] message_loop has been closed
wandb/run-20240425_134518-etajcxpg/run-etajcxpg.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a76851c2a0cca9f9fa32a212b4f4df28d1dc0eaa2ac3e93dec53f87fc69e3d2
3
- size 9699339
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5e99632979ecd500d1f6bef80d0b60abaf19f5e363ea3ee3d4842e1fe79c1f
3
+ size 9712391
wandb/run-20240425_234916-ozdw63qu/files/conda-environment.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: alignment
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - _libgcc_mutex=0.1=main
6
+ - _openmp_mutex=5.1=1_gnu
7
+ - bzip2=1.0.8=h5eee18b_5
8
+ - ca-certificates=2024.3.11=h06a4308_0
9
+ - ld_impl_linux-64=2.38=h1181459_1
10
+ - libffi=3.4.4=h6a678d5_0
11
+ - libgcc-ng=11.2.0=h1234567_1
12
+ - libgomp=11.2.0=h1234567_1
13
+ - libstdcxx-ng=11.2.0=h1234567_1
14
+ - libuuid=1.41.5=h5eee18b_0
15
+ - ncurses=6.4=h6a678d5_0
16
+ - openssl=3.0.13=h7f8727e_0
17
+ - pip=23.3.1=py311h06a4308_0
18
+ - python=3.11.9=h955ad1f_0
19
+ - readline=8.2=h5eee18b_0
20
+ - setuptools=68.2.2=py311h06a4308_0
21
+ - sqlite=3.41.2=h5eee18b_0
22
+ - tk=8.6.12=h1ccaba5_0
23
+ - wheel=0.41.2=py311h06a4308_0
24
+ - xz=5.4.6=h5eee18b_0
25
+ - zlib=1.2.13=h5eee18b_0
26
+ - pip:
27
+ - absl-py==2.1.0
28
+ - accelerate==0.29.3
29
+ - aiohttp==3.9.5
30
+ - aiosignal==1.3.1
31
+ - annotated-types==0.6.0
32
+ - appdirs==1.4.4
33
+ - attrs==23.2.0
34
+ - bitsandbytes==0.43.1
35
+ - certifi==2024.2.2
36
+ - charset-normalizer==3.3.2
37
+ - click==8.1.7
38
+ - datasets==2.19.0
39
+ - deepspeed==0.14.2
40
+ - dill==0.3.8
41
+ - docker-pycreds==0.4.0
42
+ - docstring-parser==0.16
43
+ - einops==0.7.0
44
+ - evaluate==0.4.1
45
+ - filelock==3.13.4
46
+ - frozenlist==1.4.1
47
+ - fsspec==2024.3.1
48
+ - gitdb==4.0.11
49
+ - gitpython==3.1.43
50
+ - grpcio==1.62.2
51
+ - hf-transfer==0.1.6
52
+ - hjson==3.1.0
53
+ - huggingface-hub==0.22.2
54
+ - idna==3.7
55
+ - jinja2==3.1.3
56
+ - markdown==3.6
57
+ - markdown-it-py==3.0.0
58
+ - markupsafe==2.1.5
59
+ - mdurl==0.1.2
60
+ - mpmath==1.3.0
61
+ - multidict==6.0.5
62
+ - multiprocess==0.70.16
63
+ - networkx==3.3
64
+ - ninja==1.11.1.1
65
+ - numpy==1.26.4
66
+ - nvidia-cublas-cu12==12.1.3.1
67
+ - nvidia-cuda-cupti-cu12==12.1.105
68
+ - nvidia-cuda-nvrtc-cu12==12.1.105
69
+ - nvidia-cuda-runtime-cu12==12.1.105
70
+ - nvidia-cudnn-cu12==8.9.2.26
71
+ - nvidia-cufft-cu12==11.0.2.54
72
+ - nvidia-curand-cu12==10.3.2.106
73
+ - nvidia-cusolver-cu12==11.4.5.107
74
+ - nvidia-cusparse-cu12==12.1.0.106
75
+ - nvidia-nccl-cu12==2.19.3
76
+ - nvidia-nvjitlink-cu12==12.4.127
77
+ - nvidia-nvtx-cu12==12.1.105
78
+ - packaging==24.0
79
+ - pandas==2.2.2
80
+ - peft==0.10.0
81
+ - pillow==10.3.0
82
+ - protobuf==3.20.2
83
+ - psutil==5.9.8
84
+ - py-cpuinfo==9.0.0
85
+ - pyarrow==16.0.0
86
+ - pyarrow-hotfix==0.6
87
+ - pydantic==2.7.1
88
+ - pydantic-core==2.18.2
89
+ - pygments==2.17.2
90
+ - pynvml==11.5.0
91
+ - python-dateutil==2.9.0.post0
92
+ - pytz==2024.1
93
+ - pyyaml==6.0.1
94
+ - regex==2024.4.16
95
+ - requests==2.31.0
96
+ - responses==0.18.0
97
+ - rich==13.7.1
98
+ - safetensors==0.4.3
99
+ - scipy==1.13.0
100
+ - sentencepiece==0.2.0
101
+ - sentry-sdk==2.0.0
102
+ - setproctitle==1.3.3
103
+ - shtab==1.7.1
104
+ - six==1.16.0
105
+ - smmap==5.0.1
106
+ - sympy==1.12
107
+ - tensorboard==2.16.2
108
+ - tensorboard-data-server==0.7.2
109
+ - tokenizers==0.19.1
110
+ - torch==2.2.2
111
+ - torchaudio==2.2.2
112
+ - torchvision==0.17.2
113
+ - tqdm==4.66.2
114
+ - transformers==4.40.1
115
+ - triton==2.2.0
116
+ - trl==0.8.6
117
+ - typing-extensions==4.11.0
118
+ - tyro==0.8.3
119
+ - tzdata==2024.1
120
+ - urllib3==2.2.1
121
+ - wandb==0.16.6
122
+ - werkzeug==3.0.2
123
+ - xxhash==3.4.1
124
+ - yarl==1.9.4
125
+ prefix: /fsx/sanchit/miniconda3/envs/alignment
wandb/run-20240425_234916-ozdw63qu/files/config.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.9
7
+ cli_version: 0.16.6
8
+ framework: huggingface
9
+ huggingface_version: 4.40.1
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1714088956.0
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 11
17
+ - 49
18
+ - 51
19
+ - 55
20
+ - 71
21
+ - 84
22
+ - 98
23
+ 3:
24
+ - 23
25
+ 4: 3.11.9
26
+ 5: 0.16.6
27
+ 6: 4.40.1
28
+ 8:
29
+ - 5
30
+ 13: linux-x86_64
wandb/run-20240425_234916-ozdw63qu/files/output.log ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ 20001it [00:03, 6357.99it/s] [INFO|trainer.py:2316] 2024-04-25 23:49:24,222 >>
3
+ Training completed. Do not forget to share your model on huggingface.co/models =)
4
+ 20001it [00:03, 6316.44it/s]
5
+ [INFO|trainer.py:3614] 2024-04-25 23:49:24,260 >> ***** Running Evaluation *****
6
+ [INFO|trainer.py:3616] 2024-04-25 23:49:24,260 >> Num examples = 639
7
+ [INFO|trainer.py:3619] 2024-04-25 23:49:24,260 >> Batch size = 32
8
+ 0%| | 0/3 [00:00<?, ?it/s]
9
+ {'train_runtime': 9.0702, 'train_samples_per_second': 564484.818, 'train_steps_per_second': 2205.019, 'train_loss': 4.2969823139893115e-05, 'epoch': 4.94}
10
+ ***** train metrics *****
11
+ epoch = 4.9446
12
+ total_flos = 84364461208GF
13
+ train_loss = 0.0
14
+ train_runtime = 0:00:09.07
15
+ train_samples = 1467352
16
+ train_samples_per_second = 564484.818
17
+ train_steps_per_second = 2205.019
18
+ 2024-04-25 23:49:24 - INFO - __main__ - *** Evaluate ***
19
+ ***** eval metrics *****
20
+ epoch = 4.9446
21
+ eval_loss = 1.0042
22
+ eval_runtime = 0:00:01.47
23
+ eval_samples = 1000
24
+ eval_samples_per_second = 433.323
25
+ eval_steps_per_second = 2.034
26
+ 100%|██████████| 3/3 [00:01<00:00, 2.17it/s]
27
+ [INFO|trainer.py:3305] 2024-04-25 23:49:25,759 >> Saving model checkpoint to ./
28
+ [INFO|configuration_utils.py:471] 2024-04-25 23:49:25,761 >> Configuration saved in ./config.json
29
+ [INFO|configuration_utils.py:697] 2024-04-25 23:49:25,763 >> Configuration saved in ./generation_config.json
30
+ [INFO|modeling_utils.py:2590] 2024-04-25 23:49:30,656 >> Model weights saved in ./model.safetensors
31
+ [INFO|tokenization_utils_base.py:2488] 2024-04-25 23:49:30,659 >> tokenizer config file saved in ./tokenizer_config.json
32
+ [INFO|tokenization_utils_base.py:2497] 2024-04-25 23:49:30,661 >> Special tokens file saved in ./special_tokens_map.json
33
+ [INFO|trainer.py:3305] 2024-04-25 23:49:30,684 >> Saving model checkpoint to ./
34
+ [INFO|configuration_utils.py:471] 2024-04-25 23:49:30,686 >> Configuration saved in ./config.json
35
+ [INFO|configuration_utils.py:697] 2024-04-25 23:49:30,687 >> Configuration saved in ./generation_config.json
36
+ [INFO|modeling_utils.py:2590] 2024-04-25 23:49:35,765 >> Model weights saved in ./model.safetensors
37
+ [INFO|tokenization_utils_base.py:2488] 2024-04-25 23:49:35,768 >> tokenizer config file saved in ./tokenizer_config.json
38
+ [INFO|tokenization_utils_base.py:2497] 2024-04-25 23:49:35,769 >> Special tokens file saved in ./special_tokens_map.json
39
+ [INFO|modelcard.py:450] 2024-04-25 23:49:35,816 >> Dropping the following result as it does not have all the necessary fields:
wandb/run-20240425_234916-ozdw63qu/files/requirements.txt ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GitPython==3.1.43
2
+ Jinja2==3.1.3
3
+ Markdown==3.6
4
+ MarkupSafe==2.1.5
5
+ PyYAML==6.0.1
6
+ Pygments==2.17.2
7
+ Werkzeug==3.0.2
8
+ absl-py==2.1.0
9
+ accelerate==0.29.3
10
+ aiohttp==3.9.5
11
+ aiosignal==1.3.1
12
+ alignment-handbook==0.4.0.dev0
13
+ annotated-types==0.6.0
14
+ appdirs==1.4.4
15
+ attrs==23.2.0
16
+ bitsandbytes==0.43.1
17
+ certifi==2024.2.2
18
+ charset-normalizer==3.3.2
19
+ click==8.1.7
20
+ datasets==2.19.0
21
+ deepspeed==0.14.2
22
+ dill==0.3.8
23
+ docker-pycreds==0.4.0
24
+ docstring_parser==0.16
25
+ einops==0.7.0
26
+ evaluate==0.4.1
27
+ filelock==3.13.4
28
+ frozenlist==1.4.1
29
+ fsspec==2024.3.1
30
+ gitdb==4.0.11
31
+ grpcio==1.62.2
32
+ hf_transfer==0.1.6
33
+ hjson==3.1.0
34
+ huggingface-hub==0.22.2
35
+ idna==3.7
36
+ markdown-it-py==3.0.0
37
+ mdurl==0.1.2
38
+ mpmath==1.3.0
39
+ multidict==6.0.5
40
+ multiprocess==0.70.16
41
+ networkx==3.3
42
+ ninja==1.11.1.1
43
+ numpy==1.26.4
44
+ nvidia-cublas-cu12==12.1.3.1
45
+ nvidia-cuda-cupti-cu12==12.1.105
46
+ nvidia-cuda-nvrtc-cu12==12.1.105
47
+ nvidia-cuda-runtime-cu12==12.1.105
48
+ nvidia-cudnn-cu12==8.9.2.26
49
+ nvidia-cufft-cu12==11.0.2.54
50
+ nvidia-curand-cu12==10.3.2.106
51
+ nvidia-cusolver-cu12==11.4.5.107
52
+ nvidia-cusparse-cu12==12.1.0.106
53
+ nvidia-nccl-cu12==2.19.3
54
+ nvidia-nvjitlink-cu12==12.4.127
55
+ nvidia-nvtx-cu12==12.1.105
56
+ packaging==24.0
57
+ pandas==2.2.2
58
+ peft==0.10.0
59
+ pillow==10.3.0
60
+ pip==23.3.1
61
+ protobuf==3.20.2
62
+ psutil==5.9.8
63
+ py-cpuinfo==9.0.0
64
+ pyarrow-hotfix==0.6
65
+ pyarrow==16.0.0
66
+ pydantic==2.7.1
67
+ pydantic_core==2.18.2
68
+ pynvml==11.5.0
69
+ python-dateutil==2.9.0.post0
70
+ pytz==2024.1
71
+ regex==2024.4.16
72
+ requests==2.31.0
73
+ responses==0.18.0
74
+ rich==13.7.1
75
+ safetensors==0.4.3
76
+ scipy==1.13.0
77
+ sentencepiece==0.2.0
78
+ sentry-sdk==2.0.0
79
+ setproctitle==1.3.3
80
+ setuptools==68.2.2
81
+ shtab==1.7.1
82
+ six==1.16.0
83
+ smmap==5.0.1
84
+ sympy==1.12
85
+ tensorboard-data-server==0.7.2
86
+ tensorboard==2.16.2
87
+ tokenizers==0.19.1
88
+ torch==2.2.2
89
+ torchaudio==2.2.2
90
+ torchvision==0.17.2
91
+ tqdm==4.66.2
92
+ transformers==4.40.1
93
+ triton==2.2.0
94
+ trl==0.8.6
95
+ typing_extensions==4.11.0
96
+ tyro==0.8.3
97
+ tzdata==2024.1
98
+ urllib3==2.2.1
99
+ wandb==0.16.6
100
+ wheel==0.41.2
101
+ xxhash==3.4.1
102
+ yarl==1.9.4
wandb/run-20240425_234916-ozdw63qu/files/wandb-metadata.json ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-1048-aws-x86_64-with-glibc2.31",
3
+ "python": "3.11.9",
4
+ "heartbeatAt": "2024-04-25T23:49:16.939535",
5
+ "startedAt": "2024-04-25T23:49:16.496864",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "./config_full.yaml"
10
+ ],
11
+ "state": "running",
12
+ "program": "/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py",
13
+ "codePathLocal": "run_sft.py",
14
+ "codePath": "run_sft.py",
15
+ "git": {
16
+ "remote": "https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft-ultrachat",
17
+ "commit": "cbea69c6b95c970317a1e47c3f614b55b33f8ed9"
18
+ },
19
+ "email": null,
20
+ "root": "/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat",
21
+ "host": "ip-26-0-167-177",
22
+ "username": "sanchit",
23
+ "executable": "/fsx/sanchit/miniconda3/envs/alignment/bin/python",
24
+ "cpu_count": 96,
25
+ "cpu_count_logical": 96,
26
+ "cpu_freq": {
27
+ "current": 2718.7540416666648,
28
+ "min": 0.0,
29
+ "max": 0.0
30
+ },
31
+ "cpu_freq_per_core": [
32
+ {
33
+ "current": 2603.523,
34
+ "min": 0.0,
35
+ "max": 0.0
36
+ },
37
+ {
38
+ "current": 2649.998,
39
+ "min": 0.0,
40
+ "max": 0.0
41
+ },
42
+ {
43
+ "current": 2649.998,
44
+ "min": 0.0,
45
+ "max": 0.0
46
+ },
47
+ {
48
+ "current": 2649.998,
49
+ "min": 0.0,
50
+ "max": 0.0
51
+ },
52
+ {
53
+ "current": 2649.998,
54
+ "min": 0.0,
55
+ "max": 0.0
56
+ },
57
+ {
58
+ "current": 2649.998,
59
+ "min": 0.0,
60
+ "max": 0.0
61
+ },
62
+ {
63
+ "current": 2649.998,
64
+ "min": 0.0,
65
+ "max": 0.0
66
+ },
67
+ {
68
+ "current": 2649.998,
69
+ "min": 0.0,
70
+ "max": 0.0
71
+ },
72
+ {
73
+ "current": 2649.998,
74
+ "min": 0.0,
75
+ "max": 0.0
76
+ },
77
+ {
78
+ "current": 2649.998,
79
+ "min": 0.0,
80
+ "max": 0.0
81
+ },
82
+ {
83
+ "current": 2649.998,
84
+ "min": 0.0,
85
+ "max": 0.0
86
+ },
87
+ {
88
+ "current": 2649.998,
89
+ "min": 0.0,
90
+ "max": 0.0
91
+ },
92
+ {
93
+ "current": 2649.998,
94
+ "min": 0.0,
95
+ "max": 0.0
96
+ },
97
+ {
98
+ "current": 2649.998,
99
+ "min": 0.0,
100
+ "max": 0.0
101
+ },
102
+ {
103
+ "current": 2649.998,
104
+ "min": 0.0,
105
+ "max": 0.0
106
+ },
107
+ {
108
+ "current": 2649.998,
109
+ "min": 0.0,
110
+ "max": 0.0
111
+ },
112
+ {
113
+ "current": 2649.998,
114
+ "min": 0.0,
115
+ "max": 0.0
116
+ },
117
+ {
118
+ "current": 2649.998,
119
+ "min": 0.0,
120
+ "max": 0.0
121
+ },
122
+ {
123
+ "current": 2649.998,
124
+ "min": 0.0,
125
+ "max": 0.0
126
+ },
127
+ {
128
+ "current": 2649.998,
129
+ "min": 0.0,
130
+ "max": 0.0
131
+ },
132
+ {
133
+ "current": 2649.998,
134
+ "min": 0.0,
135
+ "max": 0.0
136
+ },
137
+ {
138
+ "current": 2649.998,
139
+ "min": 0.0,
140
+ "max": 0.0
141
+ },
142
+ {
143
+ "current": 2649.998,
144
+ "min": 0.0,
145
+ "max": 0.0
146
+ },
147
+ {
148
+ "current": 2649.998,
149
+ "min": 0.0,
150
+ "max": 0.0
151
+ },
152
+ {
153
+ "current": 2649.998,
154
+ "min": 0.0,
155
+ "max": 0.0
156
+ },
157
+ {
158
+ "current": 2649.998,
159
+ "min": 0.0,
160
+ "max": 0.0
161
+ },
162
+ {
163
+ "current": 2649.998,
164
+ "min": 0.0,
165
+ "max": 0.0
166
+ },
167
+ {
168
+ "current": 2649.998,
169
+ "min": 0.0,
170
+ "max": 0.0
171
+ },
172
+ {
173
+ "current": 2649.998,
174
+ "min": 0.0,
175
+ "max": 0.0
176
+ },
177
+ {
178
+ "current": 3596.748,
179
+ "min": 0.0,
180
+ "max": 0.0
181
+ },
182
+ {
183
+ "current": 2649.998,
184
+ "min": 0.0,
185
+ "max": 0.0
186
+ },
187
+ {
188
+ "current": 2649.998,
189
+ "min": 0.0,
190
+ "max": 0.0
191
+ },
192
+ {
193
+ "current": 2649.998,
194
+ "min": 0.0,
195
+ "max": 0.0
196
+ },
197
+ {
198
+ "current": 2649.998,
199
+ "min": 0.0,
200
+ "max": 0.0
201
+ },
202
+ {
203
+ "current": 3596.723,
204
+ "min": 0.0,
205
+ "max": 0.0
206
+ },
207
+ {
208
+ "current": 2649.998,
209
+ "min": 0.0,
210
+ "max": 0.0
211
+ },
212
+ {
213
+ "current": 2649.998,
214
+ "min": 0.0,
215
+ "max": 0.0
216
+ },
217
+ {
218
+ "current": 2649.998,
219
+ "min": 0.0,
220
+ "max": 0.0
221
+ },
222
+ {
223
+ "current": 2649.998,
224
+ "min": 0.0,
225
+ "max": 0.0
226
+ },
227
+ {
228
+ "current": 2649.998,
229
+ "min": 0.0,
230
+ "max": 0.0
231
+ },
232
+ {
233
+ "current": 2649.998,
234
+ "min": 0.0,
235
+ "max": 0.0
236
+ },
237
+ {
238
+ "current": 2649.998,
239
+ "min": 0.0,
240
+ "max": 0.0
241
+ },
242
+ {
243
+ "current": 2649.998,
244
+ "min": 0.0,
245
+ "max": 0.0
246
+ },
247
+ {
248
+ "current": 2649.998,
249
+ "min": 0.0,
250
+ "max": 0.0
251
+ },
252
+ {
253
+ "current": 2649.998,
254
+ "min": 0.0,
255
+ "max": 0.0
256
+ },
257
+ {
258
+ "current": 2649.998,
259
+ "min": 0.0,
260
+ "max": 0.0
261
+ },
262
+ {
263
+ "current": 2649.998,
264
+ "min": 0.0,
265
+ "max": 0.0
266
+ },
267
+ {
268
+ "current": 2649.998,
269
+ "min": 0.0,
270
+ "max": 0.0
271
+ },
272
+ {
273
+ "current": 2649.998,
274
+ "min": 0.0,
275
+ "max": 0.0
276
+ },
277
+ {
278
+ "current": 2649.998,
279
+ "min": 0.0,
280
+ "max": 0.0
281
+ },
282
+ {
283
+ "current": 2649.998,
284
+ "min": 0.0,
285
+ "max": 0.0
286
+ },
287
+ {
288
+ "current": 3597.872,
289
+ "min": 0.0,
290
+ "max": 0.0
291
+ },
292
+ {
293
+ "current": 2649.998,
294
+ "min": 0.0,
295
+ "max": 0.0
296
+ },
297
+ {
298
+ "current": 2649.998,
299
+ "min": 0.0,
300
+ "max": 0.0
301
+ },
302
+ {
303
+ "current": 2649.998,
304
+ "min": 0.0,
305
+ "max": 0.0
306
+ },
307
+ {
308
+ "current": 2649.998,
309
+ "min": 0.0,
310
+ "max": 0.0
311
+ },
312
+ {
313
+ "current": 2649.998,
314
+ "min": 0.0,
315
+ "max": 0.0
316
+ },
317
+ {
318
+ "current": 2649.998,
319
+ "min": 0.0,
320
+ "max": 0.0
321
+ },
322
+ {
323
+ "current": 2649.998,
324
+ "min": 0.0,
325
+ "max": 0.0
326
+ },
327
+ {
328
+ "current": 2649.998,
329
+ "min": 0.0,
330
+ "max": 0.0
331
+ },
332
+ {
333
+ "current": 2649.998,
334
+ "min": 0.0,
335
+ "max": 0.0
336
+ },
337
+ {
338
+ "current": 2649.998,
339
+ "min": 0.0,
340
+ "max": 0.0
341
+ },
342
+ {
343
+ "current": 2649.998,
344
+ "min": 0.0,
345
+ "max": 0.0
346
+ },
347
+ {
348
+ "current": 3590.698,
349
+ "min": 0.0,
350
+ "max": 0.0
351
+ },
352
+ {
353
+ "current": 2649.998,
354
+ "min": 0.0,
355
+ "max": 0.0
356
+ },
357
+ {
358
+ "current": 2649.998,
359
+ "min": 0.0,
360
+ "max": 0.0
361
+ },
362
+ {
363
+ "current": 2649.998,
364
+ "min": 0.0,
365
+ "max": 0.0
366
+ },
367
+ {
368
+ "current": 2688.441,
369
+ "min": 0.0,
370
+ "max": 0.0
371
+ },
372
+ {
373
+ "current": 2649.998,
374
+ "min": 0.0,
375
+ "max": 0.0
376
+ },
377
+ {
378
+ "current": 2649.998,
379
+ "min": 0.0,
380
+ "max": 0.0
381
+ },
382
+ {
383
+ "current": 2649.998,
384
+ "min": 0.0,
385
+ "max": 0.0
386
+ },
387
+ {
388
+ "current": 2649.998,
389
+ "min": 0.0,
390
+ "max": 0.0
391
+ },
392
+ {
393
+ "current": 2649.998,
394
+ "min": 0.0,
395
+ "max": 0.0
396
+ },
397
+ {
398
+ "current": 2649.998,
399
+ "min": 0.0,
400
+ "max": 0.0
401
+ },
402
+ {
403
+ "current": 2649.998,
404
+ "min": 0.0,
405
+ "max": 0.0
406
+ },
407
+ {
408
+ "current": 2649.998,
409
+ "min": 0.0,
410
+ "max": 0.0
411
+ },
412
+ {
413
+ "current": 2649.998,
414
+ "min": 0.0,
415
+ "max": 0.0
416
+ },
417
+ {
418
+ "current": 2649.998,
419
+ "min": 0.0,
420
+ "max": 0.0
421
+ },
422
+ {
423
+ "current": 2649.998,
424
+ "min": 0.0,
425
+ "max": 0.0
426
+ },
427
+ {
428
+ "current": 3582.227,
429
+ "min": 0.0,
430
+ "max": 0.0
431
+ },
432
+ {
433
+ "current": 2649.998,
434
+ "min": 0.0,
435
+ "max": 0.0
436
+ },
437
+ {
438
+ "current": 3596.99,
439
+ "min": 0.0,
440
+ "max": 0.0
441
+ },
442
+ {
443
+ "current": 2649.998,
444
+ "min": 0.0,
445
+ "max": 0.0
446
+ },
447
+ {
448
+ "current": 2649.998,
449
+ "min": 0.0,
450
+ "max": 0.0
451
+ },
452
+ {
453
+ "current": 2649.998,
454
+ "min": 0.0,
455
+ "max": 0.0
456
+ },
457
+ {
458
+ "current": 2649.998,
459
+ "min": 0.0,
460
+ "max": 0.0
461
+ },
462
+ {
463
+ "current": 2649.998,
464
+ "min": 0.0,
465
+ "max": 0.0
466
+ },
467
+ {
468
+ "current": 2649.998,
469
+ "min": 0.0,
470
+ "max": 0.0
471
+ },
472
+ {
473
+ "current": 2649.998,
474
+ "min": 0.0,
475
+ "max": 0.0
476
+ },
477
+ {
478
+ "current": 2649.998,
479
+ "min": 0.0,
480
+ "max": 0.0
481
+ },
482
+ {
483
+ "current": 3597.34,
484
+ "min": 0.0,
485
+ "max": 0.0
486
+ },
487
+ {
488
+ "current": 2649.998,
489
+ "min": 0.0,
490
+ "max": 0.0
491
+ },
492
+ {
493
+ "current": 2649.998,
494
+ "min": 0.0,
495
+ "max": 0.0
496
+ },
497
+ {
498
+ "current": 2649.998,
499
+ "min": 0.0,
500
+ "max": 0.0
501
+ },
502
+ {
503
+ "current": 2649.998,
504
+ "min": 0.0,
505
+ "max": 0.0
506
+ },
507
+ {
508
+ "current": 2649.998,
509
+ "min": 0.0,
510
+ "max": 0.0
511
+ }
512
+ ],
513
+ "disk": {
514
+ "/": {
515
+ "total": 290.7472343444824,
516
+ "used": 58.58917236328125
517
+ }
518
+ },
519
+ "gpu": "NVIDIA H100 80GB HBM3",
520
+ "gpu_count": 8,
521
+ "gpu_devices": [
522
+ {
523
+ "name": "NVIDIA H100 80GB HBM3",
524
+ "memory_total": 85520809984
525
+ },
526
+ {
527
+ "name": "NVIDIA H100 80GB HBM3",
528
+ "memory_total": 85520809984
529
+ },
530
+ {
531
+ "name": "NVIDIA H100 80GB HBM3",
532
+ "memory_total": 85520809984
533
+ },
534
+ {
535
+ "name": "NVIDIA H100 80GB HBM3",
536
+ "memory_total": 85520809984
537
+ },
538
+ {
539
+ "name": "NVIDIA H100 80GB HBM3",
540
+ "memory_total": 85520809984
541
+ },
542
+ {
543
+ "name": "NVIDIA H100 80GB HBM3",
544
+ "memory_total": 85520809984
545
+ },
546
+ {
547
+ "name": "NVIDIA H100 80GB HBM3",
548
+ "memory_total": 85520809984
549
+ },
550
+ {
551
+ "name": "NVIDIA H100 80GB HBM3",
552
+ "memory_total": 85520809984
553
+ }
554
+ ],
555
+ "memory": {
556
+ "total": 1999.9855155944824
557
+ }
558
+ }
wandb/run-20240425_234916-ozdw63qu/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train_runtime": 9.0702, "train_samples_per_second": 564484.818, "train_steps_per_second": 2205.019, "total_flos": 9.058565045825516e+19, "train_loss": 4.2969823139893115e-05, "train/epoch": 4.944622991347343, "train/global_step": 20001, "_timestamp": 1714088965.7540886, "_runtime": 9.237897634506226, "_step": 1, "eval/loss": 1.0042184591293335, "eval/runtime": 1.4747, "eval/samples_per_second": 433.323, "eval/steps_per_second": 2.034}
wandb/run-20240425_234916-ozdw63qu/logs/debug-internal.log ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-04-25 23:49:16,511 INFO StreamThr :212584 [internal.py:wandb_internal():86] W&B internal server running at pid: 212584, started at: 2024-04-25 23:49:16.510614
2
+ 2024-04-25 23:49:16,512 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: status
3
+ 2024-04-25 23:49:16,519 INFO WriterThread:212584 [datastore.py:open_for_write():87] open: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/run-ozdw63qu.wandb
4
+ 2024-04-25 23:49:16,520 DEBUG SenderThread:212584 [sender.py:send():379] send: header
5
+ 2024-04-25 23:49:16,537 DEBUG SenderThread:212584 [sender.py:send():379] send: run
6
+ 2024-04-25 23:49:16,772 INFO SenderThread:212584 [dir_watcher.py:__init__():211] watching files in: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files
7
+ 2024-04-25 23:49:16,772 INFO SenderThread:212584 [sender.py:_start_run_threads():1124] run started: ozdw63qu with start time 1714088956.516191
8
+ 2024-04-25 23:49:16,778 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: check_version
9
+ 2024-04-25 23:49:16,778 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: check_version
10
+ 2024-04-25 23:49:16,833 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: run_start
11
+ 2024-04-25 23:49:16,892 DEBUG HandlerThread:212584 [system_info.py:__init__():26] System info init
12
+ 2024-04-25 23:49:16,892 DEBUG HandlerThread:212584 [system_info.py:__init__():41] System info init done
13
+ 2024-04-25 23:49:16,892 INFO HandlerThread:212584 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-04-25 23:49:16,893 INFO SystemMonitor:212584 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-04-25 23:49:16,893 INFO HandlerThread:212584 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-04-25 23:49:16,893 INFO SystemMonitor:212584 [interfaces.py:start():190] Started cpu monitoring
17
+ 2024-04-25 23:49:16,893 INFO SystemMonitor:212584 [interfaces.py:start():190] Started disk monitoring
18
+ 2024-04-25 23:49:16,894 INFO SystemMonitor:212584 [interfaces.py:start():190] Started gpu monitoring
19
+ 2024-04-25 23:49:16,894 INFO SystemMonitor:212584 [interfaces.py:start():190] Started memory monitoring
20
+ 2024-04-25 23:49:16,895 INFO SystemMonitor:212584 [interfaces.py:start():190] Started network monitoring
21
+ 2024-04-25 23:49:16,939 DEBUG HandlerThread:212584 [system_info.py:probe():150] Probing system
22
+ 2024-04-25 23:49:16,942 DEBUG HandlerThread:212584 [system_info.py:_probe_git():135] Probing git
23
+ 2024-04-25 23:49:16,961 DEBUG HandlerThread:212584 [system_info.py:_probe_git():143] Probing git done
24
+ 2024-04-25 23:49:16,961 DEBUG HandlerThread:212584 [system_info.py:probe():198] Probing system done
25
+ 2024-04-25 23:49:16,961 DEBUG HandlerThread:212584 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-1048-aws-x86_64-with-glibc2.31', 'python': '3.11.9', 'heartbeatAt': '2024-04-25T23:49:16.939535', 'startedAt': '2024-04-25T23:49:16.496864', 'docker': None, 'cuda': None, 'args': ('./config_full.yaml',), 'state': 'running', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py', 'codePathLocal': 'run_sft.py', 'codePath': 'run_sft.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft-ultrachat', 'commit': 'cbea69c6b95c970317a1e47c3f614b55b33f8ed9'}, 'email': None, 'root': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat', 'host': 'ip-26-0-167-177', 'username': 'sanchit', 'executable': '/fsx/sanchit/miniconda3/envs/alignment/bin/python', 'cpu_count': 96, 'cpu_count_logical': 96, 'cpu_freq': {'current': 2718.7540416666648, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2603.523, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3596.748, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3596.723, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3597.872, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3590.698, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2688.441, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3582.227, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3596.99, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 3597.34, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}, {'current': 2649.998, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 290.7472343444824, 'used': 58.58917236328125}}, 'gpu': 'NVIDIA H100 80GB HBM3', 'gpu_count': 8, 'gpu_devices': [{'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}], 'memory': {'total': 1999.9855155944824}}
26
+ 2024-04-25 23:49:16,962 INFO HandlerThread:212584 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-04-25 23:49:16,962 INFO HandlerThread:212584 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-04-25 23:49:16,962 DEBUG HandlerThread:212584 [system_info.py:_save_conda():207] Saving list of conda packages installed into the current environment
29
+ 2024-04-25 23:49:17,774 INFO Thread-12 :212584 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/conda-environment.yaml
30
+ 2024-04-25 23:49:20,884 DEBUG HandlerThread:212584 [system_info.py:_save_conda():222] Saving conda packages done
31
+ 2024-04-25 23:49:20,887 INFO HandlerThread:212584 [system_monitor.py:probe():229] Finished publishing system info
32
+ 2024-04-25 23:49:20,915 DEBUG SenderThread:212584 [sender.py:send():379] send: files
33
+ 2024-04-25 23:49:20,915 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-metadata.json with policy now
34
+ 2024-04-25 23:49:21,061 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: python_packages
35
+ 2024-04-25 23:49:21,061 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: python_packages
36
+ 2024-04-25 23:49:21,061 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: internal_messages
37
+ 2024-04-25 23:49:21,062 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: stop_status
38
+ 2024-04-25 23:49:21,063 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: stop_status
39
+ 2024-04-25 23:49:21,184 INFO wandb-upload_0:212584 [upload_job.py:push():131] Uploaded file /tmp/tmp9wuipw18wandb/r4fodxap-wandb-metadata.json
40
+ 2024-04-25 23:49:21,185 DEBUG SenderThread:212584 [sender.py:send():379] send: telemetry
41
+ 2024-04-25 23:49:21,185 DEBUG SenderThread:212584 [sender.py:send():379] send: config
42
+ 2024-04-25 23:49:21,187 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
43
+ 2024-04-25 23:49:21,188 DEBUG SenderThread:212584 [sender.py:send():379] send: telemetry
44
+ 2024-04-25 23:49:21,188 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
45
+ 2024-04-25 23:49:21,188 WARNING SenderThread:212584 [sender.py:send_metric():1341] Seen metric with glob (shouldn't happen)
46
+ 2024-04-25 23:49:21,188 DEBUG SenderThread:212584 [sender.py:send():379] send: telemetry
47
+ 2024-04-25 23:49:21,778 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/conda-environment.yaml
48
+ 2024-04-25 23:49:21,778 INFO Thread-12 :212584 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/wandb-metadata.json
49
+ 2024-04-25 23:49:21,778 INFO Thread-12 :212584 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/requirements.txt
50
+ 2024-04-25 23:49:21,778 INFO Thread-12 :212584 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
51
+ 2024-04-25 23:49:22,189 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: status_report
52
+ 2024-04-25 23:49:23,780 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
53
+ 2024-04-25 23:49:24,232 DEBUG SenderThread:212584 [sender.py:send():379] send: telemetry
54
+ 2024-04-25 23:49:24,233 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
55
+ 2024-04-25 23:49:24,234 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: partial_history
56
+ 2024-04-25 23:49:24,236 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
57
+ 2024-04-25 23:49:24,237 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
58
+ 2024-04-25 23:49:24,238 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
59
+ 2024-04-25 23:49:24,238 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
60
+ 2024-04-25 23:49:24,239 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
61
+ 2024-04-25 23:49:24,239 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
62
+ 2024-04-25 23:49:24,240 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
63
+ 2024-04-25 23:49:24,240 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
64
+ 2024-04-25 23:49:24,242 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
65
+ 2024-04-25 23:49:24,242 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
66
+ 2024-04-25 23:49:24,242 DEBUG SenderThread:212584 [sender.py:send():379] send: history
67
+ 2024-04-25 23:49:24,242 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
68
+ 2024-04-25 23:49:24,243 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
69
+ 2024-04-25 23:49:24,782 INFO Thread-12 :212584 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/wandb-summary.json
70
+ 2024-04-25 23:49:25,754 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: partial_history
71
+ 2024-04-25 23:49:25,757 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
72
+ 2024-04-25 23:49:25,758 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
73
+ 2024-04-25 23:49:25,758 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
74
+ 2024-04-25 23:49:25,758 DEBUG SenderThread:212584 [sender.py:send():379] send: metric
75
+ 2024-04-25 23:49:25,758 DEBUG SenderThread:212584 [sender.py:send():379] send: history
76
+ 2024-04-25 23:49:25,758 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: summary_record
77
+ 2024-04-25 23:49:25,760 INFO SenderThread:212584 [sender.py:_save_file():1390] saving file wandb-summary.json with policy end
78
+ 2024-04-25 23:49:25,783 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/wandb-summary.json
79
+ 2024-04-25 23:49:25,784 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
80
+ 2024-04-25 23:49:26,785 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
81
+ 2024-04-25 23:49:27,764 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: status_report
82
+ 2024-04-25 23:49:27,787 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
83
+ 2024-04-25 23:49:31,791 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
84
+ 2024-04-25 23:49:33,688 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: status_report
85
+ 2024-04-25 23:49:36,062 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: internal_messages
86
+ 2024-04-25 23:49:36,063 DEBUG HandlerThread:212584 [handler.py:handle_request():146] handle_request: stop_status
87
+ 2024-04-25 23:49:36,063 DEBUG SenderThread:212584 [sender.py:send_request():406] send_request: stop_status
88
+ 2024-04-25 23:49:37,798 INFO Thread-12 :212584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/files/output.log
wandb/run-20240425_234916-ozdw63qu/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Configure stats pid to 211869
3
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/settings
5
+ 2024-04-25 23:49:16,505 INFO MainThread:211869 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py'}
8
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:_log_setup():521] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/logs/debug.log
10
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:_log_setup():522] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_234916-ozdw63qu/logs/debug-internal.log
11
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():561] calling init triggers
12
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():611] starting backend
15
+ 2024-04-25 23:49:16,506 INFO MainThread:211869 [wandb_init.py:init():615] setting up manager
16
+ 2024-04-25 23:49:16,508 INFO MainThread:211869 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-04-25 23:49:16,515 INFO MainThread:211869 [wandb_init.py:init():623] backend started and connected
18
+ 2024-04-25 23:49:16,518 INFO MainThread:211869 [wandb_init.py:init():715] updated telemetry
19
+ 2024-04-25 23:49:16,536 INFO MainThread:211869 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
20
+ 2024-04-25 23:49:16,777 INFO MainThread:211869 [wandb_run.py:_on_init():2357] communicating current version
21
+ 2024-04-25 23:49:16,826 INFO MainThread:211869 [wandb_run.py:_on_init():2366] got version response
22
+ 2024-04-25 23:49:16,826 INFO MainThread:211869 [wandb_init.py:init():799] starting run threads in backend
23
+ 2024-04-25 23:49:21,061 INFO MainThread:211869 [wandb_run.py:_console_start():2335] atexit reg
24
+ 2024-04-25 23:49:21,061 INFO MainThread:211869 [wandb_run.py:_redirect():2190] redirect: wrap_raw
25
+ 2024-04-25 23:49:21,061 INFO MainThread:211869 [wandb_run.py:_redirect():2255] Wrapping output streams.
26
+ 2024-04-25 23:49:21,062 INFO MainThread:211869 [wandb_run.py:_redirect():2280] Redirects installed.
27
+ 2024-04-25 23:49:21,063 INFO MainThread:211869 [wandb_init.py:init():842] run started, returning control to user process
28
+ 2024-04-25 23:49:21,064 INFO MainThread:211869 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_23-48-22_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
wandb/run-20240425_234916-ozdw63qu/run-ozdw63qu.wandb ADDED
File without changes