sanchit-gandhi HF staff commited on
Commit
61336ff
1 Parent(s): ebc6ad5

Training in progress, step 5000

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20240425_134518-etajcxpg/run-etajcxpg.wandb filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
- "transformers_version": "4.40.0.dev0",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
 
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.40.1",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
config_full.yaml CHANGED
@@ -19,7 +19,8 @@ do_eval: true
19
  evaluation_strategy: steps
20
  eval_steps: 5000
21
  save_strategy: "steps"
22
- save_total_limit: 5000
 
23
  gradient_accumulation_steps: 1
24
  gradient_checkpointing: true
25
  gradient_checkpointing_kwargs:
 
19
  evaluation_strategy: steps
20
  eval_steps: 5000
21
  save_strategy: "steps"
22
+ save_steps: 5000
23
+ save_total_limit: 1
24
  gradient_accumulation_steps: 1
25
  gradient_checkpointing: true
26
  gradient_checkpointing_kwargs:
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a7667b4f7fb01bc0f00d3a07e35846f784c9d9bf8171e61c789861b33c4987f
3
  size 3141646744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4120117d250ccbc68a45f04c769a8427711032a5093688f5e6017217743a6bc2
3
  size 3141646744
runs/Apr25_13-44-28_ip-26-0-167-177/events.out.tfevents.1714052716.ip-26-0-167-177.156194.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf71866770393a20788ee13f1fefaf1e41ffc168f00346f69ee3231932150974
3
+ size 47364
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1464ab5521091ef44c1647b6866ecc70515e4a2469ed5b7ed407275c3c551c0d
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781c0a2347f9f34d96f08a02d9262669403d18a6358c936df9c6d50431430243
3
  size 4984
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log CHANGED
@@ -1,28 +1,28 @@
1
- 2024-04-24 16:43:24,533 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Current SDK version is 0.16.1
2
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Configure stats pid to 1854033
3
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/settings
5
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py'}
8
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_init.py:_log_setup():524] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240424_164324-xfbnm7qo/logs/debug.log
9
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_init.py:_log_setup():525] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240424_164324-xfbnm7qo/logs/debug-internal.log
10
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_init.py:init():564] calling init triggers
11
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
 
12
  config: {}
13
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_init.py:init():614] starting backend
14
- 2024-04-24 16:43:24,534 INFO MainThread:1854033 [wandb_init.py:init():618] setting up manager
15
- 2024-04-24 16:43:24,537 INFO MainThread:1854033 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
- 2024-04-24 16:43:24,541 INFO MainThread:1854033 [wandb_init.py:init():624] backend started and connected
17
- 2024-04-24 16:43:24,544 INFO MainThread:1854033 [wandb_init.py:init():716] updated telemetry
18
- 2024-04-24 16:43:24,569 INFO MainThread:1854033 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
19
- 2024-04-24 16:43:24,850 INFO MainThread:1854033 [wandb_run.py:_on_init():2254] communicating current version
20
- 2024-04-24 16:43:24,896 INFO MainThread:1854033 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.6 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
21
-
22
- 2024-04-24 16:43:24,896 INFO MainThread:1854033 [wandb_init.py:init():800] starting run threads in backend
23
- 2024-04-24 16:43:30,532 INFO MainThread:1854033 [wandb_run.py:_console_start():2233] atexit reg
24
- 2024-04-24 16:43:30,532 INFO MainThread:1854033 [wandb_run.py:_redirect():2088] redirect: wrap_raw
25
- 2024-04-24 16:43:30,532 INFO MainThread:1854033 [wandb_run.py:_redirect():2153] Wrapping output streams.
26
- 2024-04-24 16:43:30,532 INFO MainThread:1854033 [wandb_run.py:_redirect():2178] Redirects installed.
27
- 2024-04-24 16:43:30,533 INFO MainThread:1854033 [wandb_init.py:init():841] run started, returning control to user process
28
- 2024-04-24 16:43:30,535 INFO MainThread:1854033 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.0.dev0', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr24_16-42-31_ip-26-0-162-233', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': 5000, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
 
1
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Configure stats pid to 156194
3
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/settings
5
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py'}
8
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:_log_setup():521] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/logs/debug.log
10
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:_log_setup():522] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/logs/debug-internal.log
11
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():561] calling init triggers
12
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
13
  config: {}
14
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():611] starting backend
15
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():615] setting up manager
16
+ 2024-04-25 13:45:18,231 INFO MainThread:156194 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-04-25 13:45:18,239 INFO MainThread:156194 [wandb_init.py:init():623] backend started and connected
18
+ 2024-04-25 13:45:18,241 INFO MainThread:156194 [wandb_init.py:init():715] updated telemetry
19
+ 2024-04-25 13:45:18,267 INFO MainThread:156194 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
20
+ 2024-04-25 13:45:18,506 INFO MainThread:156194 [wandb_run.py:_on_init():2357] communicating current version
21
+ 2024-04-25 13:45:18,558 INFO MainThread:156194 [wandb_run.py:_on_init():2366] got version response
22
+ 2024-04-25 13:45:18,558 INFO MainThread:156194 [wandb_init.py:init():799] starting run threads in backend
23
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_console_start():2335] atexit reg
24
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2190] redirect: wrap_raw
25
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2255] Wrapping output streams.
26
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2280] Redirects installed.
27
+ 2024-04-25 13:45:22,709 INFO MainThread:156194 [wandb_init.py:init():842] run started, returning control to user process
28
+ 2024-04-25 13:45:22,711 INFO MainThread:156194 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_13-44-28_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
 
wandb/run-20240425_134518-etajcxpg/files/conda-environment.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: alignment
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - _libgcc_mutex=0.1=main
6
+ - _openmp_mutex=5.1=1_gnu
7
+ - bzip2=1.0.8=h5eee18b_5
8
+ - ca-certificates=2024.3.11=h06a4308_0
9
+ - ld_impl_linux-64=2.38=h1181459_1
10
+ - libffi=3.4.4=h6a678d5_0
11
+ - libgcc-ng=11.2.0=h1234567_1
12
+ - libgomp=11.2.0=h1234567_1
13
+ - libstdcxx-ng=11.2.0=h1234567_1
14
+ - libuuid=1.41.5=h5eee18b_0
15
+ - ncurses=6.4=h6a678d5_0
16
+ - openssl=3.0.13=h7f8727e_0
17
+ - pip=23.3.1=py311h06a4308_0
18
+ - python=3.11.9=h955ad1f_0
19
+ - readline=8.2=h5eee18b_0
20
+ - setuptools=68.2.2=py311h06a4308_0
21
+ - sqlite=3.41.2=h5eee18b_0
22
+ - tk=8.6.12=h1ccaba5_0
23
+ - wheel=0.41.2=py311h06a4308_0
24
+ - xz=5.4.6=h5eee18b_0
25
+ - zlib=1.2.13=h5eee18b_0
26
+ - pip:
27
+ - absl-py==2.1.0
28
+ - accelerate==0.29.3
29
+ - aiohttp==3.9.5
30
+ - aiosignal==1.3.1
31
+ - annotated-types==0.6.0
32
+ - appdirs==1.4.4
33
+ - attrs==23.2.0
34
+ - bitsandbytes==0.43.1
35
+ - certifi==2024.2.2
36
+ - charset-normalizer==3.3.2
37
+ - click==8.1.7
38
+ - datasets==2.19.0
39
+ - deepspeed==0.14.2
40
+ - dill==0.3.8
41
+ - docker-pycreds==0.4.0
42
+ - docstring-parser==0.16
43
+ - einops==0.7.0
44
+ - evaluate==0.4.1
45
+ - filelock==3.13.4
46
+ - frozenlist==1.4.1
47
+ - fsspec==2024.3.1
48
+ - gitdb==4.0.11
49
+ - gitpython==3.1.43
50
+ - grpcio==1.62.2
51
+ - hf-transfer==0.1.6
52
+ - hjson==3.1.0
53
+ - huggingface-hub==0.22.2
54
+ - idna==3.7
55
+ - jinja2==3.1.3
56
+ - markdown==3.6
57
+ - markdown-it-py==3.0.0
58
+ - markupsafe==2.1.5
59
+ - mdurl==0.1.2
60
+ - mpmath==1.3.0
61
+ - multidict==6.0.5
62
+ - multiprocess==0.70.16
63
+ - networkx==3.3
64
+ - ninja==1.11.1.1
65
+ - numpy==1.26.4
66
+ - nvidia-cublas-cu12==12.1.3.1
67
+ - nvidia-cuda-cupti-cu12==12.1.105
68
+ - nvidia-cuda-nvrtc-cu12==12.1.105
69
+ - nvidia-cuda-runtime-cu12==12.1.105
70
+ - nvidia-cudnn-cu12==8.9.2.26
71
+ - nvidia-cufft-cu12==11.0.2.54
72
+ - nvidia-curand-cu12==10.3.2.106
73
+ - nvidia-cusolver-cu12==11.4.5.107
74
+ - nvidia-cusparse-cu12==12.1.0.106
75
+ - nvidia-nccl-cu12==2.19.3
76
+ - nvidia-nvjitlink-cu12==12.4.127
77
+ - nvidia-nvtx-cu12==12.1.105
78
+ - packaging==24.0
79
+ - pandas==2.2.2
80
+ - peft==0.10.0
81
+ - pillow==10.3.0
82
+ - protobuf==3.20.2
83
+ - psutil==5.9.8
84
+ - py-cpuinfo==9.0.0
85
+ - pyarrow==16.0.0
86
+ - pyarrow-hotfix==0.6
87
+ - pydantic==2.7.1
88
+ - pydantic-core==2.18.2
89
+ - pygments==2.17.2
90
+ - pynvml==11.5.0
91
+ - python-dateutil==2.9.0.post0
92
+ - pytz==2024.1
93
+ - pyyaml==6.0.1
94
+ - regex==2024.4.16
95
+ - requests==2.31.0
96
+ - responses==0.18.0
97
+ - rich==13.7.1
98
+ - safetensors==0.4.3
99
+ - scipy==1.13.0
100
+ - sentencepiece==0.2.0
101
+ - sentry-sdk==2.0.0
102
+ - setproctitle==1.3.3
103
+ - shtab==1.7.1
104
+ - six==1.16.0
105
+ - smmap==5.0.1
106
+ - sympy==1.12
107
+ - tensorboard==2.16.2
108
+ - tensorboard-data-server==0.7.2
109
+ - tokenizers==0.19.1
110
+ - torch==2.2.2
111
+ - torchaudio==2.2.2
112
+ - torchvision==0.17.2
113
+ - tqdm==4.66.2
114
+ - transformers==4.40.1
115
+ - triton==2.2.0
116
+ - trl==0.8.6
117
+ - typing-extensions==4.11.0
118
+ - tyro==0.8.3
119
+ - tzdata==2024.1
120
+ - urllib3==2.2.1
121
+ - wandb==0.16.6
122
+ - werkzeug==3.0.2
123
+ - xxhash==3.4.1
124
+ - yarl==1.9.4
125
+ prefix: /fsx/sanchit/miniconda3/envs/alignment
wandb/run-20240425_134518-etajcxpg/files/config.yaml ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.9
7
+ cli_version: 0.16.6
8
+ framework: huggingface
9
+ huggingface_version: 4.40.1
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1714052718.0
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 11
17
+ - 49
18
+ - 51
19
+ - 55
20
+ - 71
21
+ - 84
22
+ - 98
23
+ 2:
24
+ - 1
25
+ - 11
26
+ - 49
27
+ - 51
28
+ - 55
29
+ - 71
30
+ - 84
31
+ - 98
32
+ 3:
33
+ - 7
34
+ - 23
35
+ 4: 3.11.9
36
+ 5: 0.16.6
37
+ 6: 4.40.1
38
+ 8:
39
+ - 5
40
+ 9:
41
+ 1: transformers_trainer
42
+ 13: linux-x86_64
43
+ m:
44
+ - 1: train/global_step
45
+ 6:
46
+ - 3
47
+ - 1: train/loss
48
+ 5: 1
49
+ 6:
50
+ - 1
51
+ - 1: train/grad_norm
52
+ 5: 1
53
+ 6:
54
+ - 1
55
+ - 1: train/learning_rate
56
+ 5: 1
57
+ 6:
58
+ - 1
59
+ - 1: train/epoch
60
+ 5: 1
61
+ 6:
62
+ - 1
63
+ - 1: eval/loss
64
+ 5: 1
65
+ 6:
66
+ - 1
67
+ - 1: eval/runtime
68
+ 5: 1
69
+ 6:
70
+ - 1
71
+ - 1: eval/samples_per_second
72
+ 5: 1
73
+ 6:
74
+ - 1
75
+ - 1: eval/steps_per_second
76
+ 5: 1
77
+ 6:
78
+ - 1
79
+ vocab_size:
80
+ desc: null
81
+ value: 32000
82
+ max_position_embeddings:
83
+ desc: null
84
+ value: 32768
85
+ hidden_size:
86
+ desc: null
87
+ value: 4096
88
+ intermediate_size:
89
+ desc: null
90
+ value: 14336
91
+ num_hidden_layers:
92
+ desc: null
93
+ value: 6
94
+ num_attention_heads:
95
+ desc: null
96
+ value: 32
97
+ sliding_window:
98
+ desc: null
99
+ value: 4096
100
+ num_key_value_heads:
101
+ desc: null
102
+ value: 8
103
+ hidden_act:
104
+ desc: null
105
+ value: silu
106
+ initializer_range:
107
+ desc: null
108
+ value: 0.02
109
+ rms_norm_eps:
110
+ desc: null
111
+ value: 1.0e-05
112
+ use_cache:
113
+ desc: null
114
+ value: false
115
+ rope_theta:
116
+ desc: null
117
+ value: 10000.0
118
+ attention_dropout:
119
+ desc: null
120
+ value: 0.0
121
+ return_dict:
122
+ desc: null
123
+ value: true
124
+ output_hidden_states:
125
+ desc: null
126
+ value: false
127
+ output_attentions:
128
+ desc: null
129
+ value: false
130
+ torchscript:
131
+ desc: null
132
+ value: false
133
+ torch_dtype:
134
+ desc: null
135
+ value: bfloat16
136
+ use_bfloat16:
137
+ desc: null
138
+ value: false
139
+ tf_legacy_loss:
140
+ desc: null
141
+ value: false
142
+ pruned_heads:
143
+ desc: null
144
+ value: {}
145
+ tie_word_embeddings:
146
+ desc: null
147
+ value: false
148
+ chunk_size_feed_forward:
149
+ desc: null
150
+ value: 0
151
+ is_encoder_decoder:
152
+ desc: null
153
+ value: false
154
+ is_decoder:
155
+ desc: null
156
+ value: false
157
+ cross_attention_hidden_size:
158
+ desc: null
159
+ value: null
160
+ add_cross_attention:
161
+ desc: null
162
+ value: false
163
+ tie_encoder_decoder:
164
+ desc: null
165
+ value: false
166
+ max_length:
167
+ desc: null
168
+ value: 20
169
+ min_length:
170
+ desc: null
171
+ value: 0
172
+ do_sample:
173
+ desc: null
174
+ value: false
175
+ early_stopping:
176
+ desc: null
177
+ value: false
178
+ num_beams:
179
+ desc: null
180
+ value: 1
181
+ num_beam_groups:
182
+ desc: null
183
+ value: 1
184
+ diversity_penalty:
185
+ desc: null
186
+ value: 0.0
187
+ temperature:
188
+ desc: null
189
+ value: 1.0
190
+ top_k:
191
+ desc: null
192
+ value: 50
193
+ top_p:
194
+ desc: null
195
+ value: 1.0
196
+ typical_p:
197
+ desc: null
198
+ value: 1.0
199
+ repetition_penalty:
200
+ desc: null
201
+ value: 1.0
202
+ length_penalty:
203
+ desc: null
204
+ value: 1.0
205
+ no_repeat_ngram_size:
206
+ desc: null
207
+ value: 0
208
+ encoder_no_repeat_ngram_size:
209
+ desc: null
210
+ value: 0
211
+ bad_words_ids:
212
+ desc: null
213
+ value: null
214
+ num_return_sequences:
215
+ desc: null
216
+ value: 1
217
+ output_scores:
218
+ desc: null
219
+ value: false
220
+ return_dict_in_generate:
221
+ desc: null
222
+ value: false
223
+ forced_bos_token_id:
224
+ desc: null
225
+ value: null
226
+ forced_eos_token_id:
227
+ desc: null
228
+ value: null
229
+ remove_invalid_values:
230
+ desc: null
231
+ value: false
232
+ exponential_decay_length_penalty:
233
+ desc: null
234
+ value: null
235
+ suppress_tokens:
236
+ desc: null
237
+ value: null
238
+ begin_suppress_tokens:
239
+ desc: null
240
+ value: null
241
+ architectures:
242
+ desc: null
243
+ value:
244
+ - MistralForCausalLM
245
+ finetuning_task:
246
+ desc: null
247
+ value: null
248
+ id2label:
249
+ desc: null
250
+ value:
251
+ '0': LABEL_0
252
+ '1': LABEL_1
253
+ label2id:
254
+ desc: null
255
+ value:
256
+ LABEL_0: 0
257
+ LABEL_1: 1
258
+ tokenizer_class:
259
+ desc: null
260
+ value: null
261
+ prefix:
262
+ desc: null
263
+ value: null
264
+ bos_token_id:
265
+ desc: null
266
+ value: 1
267
+ pad_token_id:
268
+ desc: null
269
+ value: null
270
+ eos_token_id:
271
+ desc: null
272
+ value: 2
273
+ sep_token_id:
274
+ desc: null
275
+ value: null
276
+ decoder_start_token_id:
277
+ desc: null
278
+ value: null
279
+ task_specific_params:
280
+ desc: null
281
+ value: null
282
+ problem_type:
283
+ desc: null
284
+ value: null
285
+ _name_or_path:
286
+ desc: null
287
+ value: sanchit-gandhi/Mistral-7B-v0.1-6-layer
288
+ transformers_version:
289
+ desc: null
290
+ value: 4.40.1
291
+ model_type:
292
+ desc: null
293
+ value: mistral
294
+ output_dir:
295
+ desc: null
296
+ value: ./
297
+ overwrite_output_dir:
298
+ desc: null
299
+ value: true
300
+ do_train:
301
+ desc: null
302
+ value: false
303
+ do_eval:
304
+ desc: null
305
+ value: true
306
+ do_predict:
307
+ desc: null
308
+ value: false
309
+ evaluation_strategy:
310
+ desc: null
311
+ value: steps
312
+ prediction_loss_only:
313
+ desc: null
314
+ value: false
315
+ per_device_train_batch_size:
316
+ desc: null
317
+ value: 32
318
+ per_device_eval_batch_size:
319
+ desc: null
320
+ value: 32
321
+ per_gpu_train_batch_size:
322
+ desc: null
323
+ value: null
324
+ per_gpu_eval_batch_size:
325
+ desc: null
326
+ value: null
327
+ gradient_accumulation_steps:
328
+ desc: null
329
+ value: 1
330
+ eval_accumulation_steps:
331
+ desc: null
332
+ value: null
333
+ eval_delay:
334
+ desc: null
335
+ value: 0
336
+ learning_rate:
337
+ desc: null
338
+ value: 0.0001
339
+ weight_decay:
340
+ desc: null
341
+ value: 0.0
342
+ adam_beta1:
343
+ desc: null
344
+ value: 0.9
345
+ adam_beta2:
346
+ desc: null
347
+ value: 0.999
348
+ adam_epsilon:
349
+ desc: null
350
+ value: 1.0e-08
351
+ max_grad_norm:
352
+ desc: null
353
+ value: 1.0
354
+ num_train_epochs:
355
+ desc: null
356
+ value: 3.0
357
+ max_steps:
358
+ desc: null
359
+ value: 20000
360
+ lr_scheduler_type:
361
+ desc: null
362
+ value: linear
363
+ lr_scheduler_kwargs:
364
+ desc: null
365
+ value: {}
366
+ warmup_ratio:
367
+ desc: null
368
+ value: 0.0
369
+ warmup_steps:
370
+ desc: null
371
+ value: 500
372
+ log_level:
373
+ desc: null
374
+ value: info
375
+ log_level_replica:
376
+ desc: null
377
+ value: warning
378
+ log_on_each_node:
379
+ desc: null
380
+ value: true
381
+ logging_dir:
382
+ desc: null
383
+ value: ./runs/Apr25_13-44-28_ip-26-0-167-177
384
+ logging_strategy:
385
+ desc: null
386
+ value: steps
387
+ logging_first_step:
388
+ desc: null
389
+ value: true
390
+ logging_steps:
391
+ desc: null
392
+ value: 25
393
+ logging_nan_inf_filter:
394
+ desc: null
395
+ value: true
396
+ save_strategy:
397
+ desc: null
398
+ value: steps
399
+ save_steps:
400
+ desc: null
401
+ value: 5000
402
+ save_total_limit:
403
+ desc: null
404
+ value: 1
405
+ save_safetensors:
406
+ desc: null
407
+ value: true
408
+ save_on_each_node:
409
+ desc: null
410
+ value: false
411
+ save_only_model:
412
+ desc: null
413
+ value: false
414
+ no_cuda:
415
+ desc: null
416
+ value: false
417
+ use_cpu:
418
+ desc: null
419
+ value: false
420
+ use_mps_device:
421
+ desc: null
422
+ value: false
423
+ seed:
424
+ desc: null
425
+ value: 42
426
+ data_seed:
427
+ desc: null
428
+ value: null
429
+ jit_mode_eval:
430
+ desc: null
431
+ value: false
432
+ use_ipex:
433
+ desc: null
434
+ value: false
435
+ bf16:
436
+ desc: null
437
+ value: true
438
+ fp16:
439
+ desc: null
440
+ value: false
441
+ fp16_opt_level:
442
+ desc: null
443
+ value: O1
444
+ half_precision_backend:
445
+ desc: null
446
+ value: auto
447
+ bf16_full_eval:
448
+ desc: null
449
+ value: false
450
+ fp16_full_eval:
451
+ desc: null
452
+ value: false
453
+ tf32:
454
+ desc: null
455
+ value: null
456
+ local_rank:
457
+ desc: null
458
+ value: 0
459
+ ddp_backend:
460
+ desc: null
461
+ value: null
462
+ tpu_num_cores:
463
+ desc: null
464
+ value: null
465
+ tpu_metrics_debug:
466
+ desc: null
467
+ value: false
468
+ debug:
469
+ desc: null
470
+ value: []
471
+ dataloader_drop_last:
472
+ desc: null
473
+ value: false
474
+ eval_steps:
475
+ desc: null
476
+ value: 5000
477
+ dataloader_num_workers:
478
+ desc: null
479
+ value: 0
480
+ dataloader_prefetch_factor:
481
+ desc: null
482
+ value: null
483
+ past_index:
484
+ desc: null
485
+ value: -1
486
+ run_name:
487
+ desc: null
488
+ value: ./
489
+ disable_tqdm:
490
+ desc: null
491
+ value: false
492
+ remove_unused_columns:
493
+ desc: null
494
+ value: true
495
+ label_names:
496
+ desc: null
497
+ value: null
498
+ load_best_model_at_end:
499
+ desc: null
500
+ value: false
501
+ metric_for_best_model:
502
+ desc: null
503
+ value: null
504
+ greater_is_better:
505
+ desc: null
506
+ value: null
507
+ ignore_data_skip:
508
+ desc: null
509
+ value: false
510
+ fsdp:
511
+ desc: null
512
+ value: []
513
+ fsdp_min_num_params:
514
+ desc: null
515
+ value: 0
516
+ fsdp_config:
517
+ desc: null
518
+ value:
519
+ min_num_params: 0
520
+ xla: false
521
+ xla_fsdp_v2: false
522
+ xla_fsdp_grad_ckpt: false
523
+ fsdp_transformer_layer_cls_to_wrap:
524
+ desc: null
525
+ value: null
526
+ accelerator_config:
527
+ desc: null
528
+ value:
529
+ split_batches: false
530
+ dispatch_batches: null
531
+ even_batches: true
532
+ use_seedable_sampler: true
533
+ gradient_accumulation_kwargs: null
534
+ deepspeed:
535
+ desc: null
536
+ value: null
537
+ label_smoothing_factor:
538
+ desc: null
539
+ value: 0.0
540
+ optim:
541
+ desc: null
542
+ value: adamw_torch
543
+ optim_args:
544
+ desc: null
545
+ value: null
546
+ adafactor:
547
+ desc: null
548
+ value: false
549
+ group_by_length:
550
+ desc: null
551
+ value: false
552
+ length_column_name:
553
+ desc: null
554
+ value: length
555
+ report_to:
556
+ desc: null
557
+ value:
558
+ - tensorboard
559
+ - wandb
560
+ ddp_find_unused_parameters:
561
+ desc: null
562
+ value: null
563
+ ddp_bucket_cap_mb:
564
+ desc: null
565
+ value: null
566
+ ddp_broadcast_buffers:
567
+ desc: null
568
+ value: null
569
+ dataloader_pin_memory:
570
+ desc: null
571
+ value: true
572
+ dataloader_persistent_workers:
573
+ desc: null
574
+ value: false
575
+ skip_memory_metrics:
576
+ desc: null
577
+ value: true
578
+ use_legacy_prediction_loop:
579
+ desc: null
580
+ value: false
581
+ push_to_hub:
582
+ desc: null
583
+ value: true
584
+ resume_from_checkpoint:
585
+ desc: null
586
+ value: null
587
+ hub_model_id:
588
+ desc: null
589
+ value: null
590
+ hub_strategy:
591
+ desc: null
592
+ value: every_save
593
+ hub_token:
594
+ desc: null
595
+ value: <HUB_TOKEN>
596
+ hub_private_repo:
597
+ desc: null
598
+ value: false
599
+ hub_always_push:
600
+ desc: null
601
+ value: false
602
+ gradient_checkpointing:
603
+ desc: null
604
+ value: true
605
+ gradient_checkpointing_kwargs:
606
+ desc: null
607
+ value:
608
+ use_reentrant: false
609
+ include_inputs_for_metrics:
610
+ desc: null
611
+ value: false
612
+ eval_do_concat_batches:
613
+ desc: null
614
+ value: true
615
+ fp16_backend:
616
+ desc: null
617
+ value: auto
618
+ push_to_hub_model_id:
619
+ desc: null
620
+ value: null
621
+ push_to_hub_organization:
622
+ desc: null
623
+ value: null
624
+ push_to_hub_token:
625
+ desc: null
626
+ value: <PUSH_TO_HUB_TOKEN>
627
+ mp_parameters:
628
+ desc: null
629
+ value: ''
630
+ auto_find_batch_size:
631
+ desc: null
632
+ value: false
633
+ full_determinism:
634
+ desc: null
635
+ value: false
636
+ torchdynamo:
637
+ desc: null
638
+ value: null
639
+ ray_scope:
640
+ desc: null
641
+ value: last
642
+ ddp_timeout:
643
+ desc: null
644
+ value: 7200
645
+ torch_compile:
646
+ desc: null
647
+ value: false
648
+ torch_compile_backend:
649
+ desc: null
650
+ value: null
651
+ torch_compile_mode:
652
+ desc: null
653
+ value: null
654
+ dispatch_batches:
655
+ desc: null
656
+ value: null
657
+ split_batches:
658
+ desc: null
659
+ value: null
660
+ include_tokens_per_second:
661
+ desc: null
662
+ value: false
663
+ include_num_input_tokens_seen:
664
+ desc: null
665
+ value: false
666
+ neftune_noise_alpha:
667
+ desc: null
668
+ value: null
669
+ optim_target_modules:
670
+ desc: null
671
+ value: null
672
+ max_seq_length:
673
+ desc: null
674
+ value: 2048
wandb/run-20240425_134518-etajcxpg/files/output.log ADDED
@@ -0,0 +1,4772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ 0%| | 1/20000 [00:03<18:31:57, 3.34s/it]
4
+
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+ 0%| | 24/20000 [00:48<10:42:13, 1.93s/it]
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+ 0%| | 49/20000 [01:36<10:34:59, 1.91s/it]
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+ 0%| | 75/20000 [02:25<10:27:08, 1.89s/it]
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+ 0%| | 100/20000 [03:12<10:24:29, 1.88s/it]
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+ 1%| | 125/20000 [03:59<10:15:18, 1.86s/it]
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+ 1%| | 149/20000 [04:44<10:14:29, 1.86s/it]
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+ 1%| | 174/20000 [05:30<10:09:32, 1.84s/it]
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+ 1%| | 199/20000 [06:16<10:08:46, 1.84s/it]
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+ 1%| | 224/20000 [07:02<10:07:40, 1.84s/it]
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+ 1%| | 249/20000 [07:48<10:01:21, 1.83s/it]
247
+
248
+
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
267
+
268
+
269
+
270
+ 1%|▏ | 274/20000 [08:34<10:06:27, 1.84s/it]
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+ 1%|▏ | 299/20000 [09:20<10:02:36, 1.84s/it]
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+
318
+ 2%|▏ | 325/20000 [10:08<10:04:40, 1.84s/it]
319
+
320
+
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+
342
+ 2%|▏ | 350/20000 [10:53<9:55:32, 1.82s/it]
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+
352
+
353
+
354
+
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+ 2%|▏ | 375/20000 [11:39<9:55:48, 1.82s/it]
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+ 2%|▏ | 399/20000 [12:22<9:54:28, 1.82s/it]
390
+
391
+
392
+
393
+
394
+
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+
407
+
408
+
409
+
410
+
411
+
412
+
413
+ 2%|▏ | 425/20000 [13:10<9:53:05, 1.82s/it]
414
+
415
+
416
+
417
+
418
+
419
+
420
+
421
+
422
+
423
+
424
+
425
+
426
+
427
+
428
+
429
+
430
+
431
+
432
+
433
+
434
+
435
+
436
+
437
+ 2%|▏ | 450/20000 [13:55<9:52:52, 1.82s/it]
438
+
439
+
440
+
441
+
442
+
443
+
444
+
445
+
446
+
447
+
448
+
449
+
450
+
451
+
452
+
453
+
454
+
455
+
456
+
457
+
458
+
459
+
460
+
461
+ 2%|▏ | 475/20000 [14:41<9:51:46, 1.82s/it]
462
+
463
+
464
+
465
+
466
+
467
+
468
+
469
+
470
+
471
+
472
+
473
+
474
+
475
+
476
+
477
+
478
+
479
+
480
+
481
+
482
+
483
+
484
+ 2%|▏ | 499/20000 [15:24<9:55:43, 1.83s/it]
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+ 3%|▎ | 525/20000 [16:12<9:54:19, 1.83s/it]
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+ 3%|▎ | 550/20000 [16:57<9:51:40, 1.83s/it]
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+
547
+
548
+
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+ 3%|▎ | 575/20000 [17:43<9:49:15, 1.82s/it]
557
+
558
+
559
+
560
+
561
+
562
+
563
+
564
+
565
+
566
+
567
+
568
+
569
+
570
+
571
+
572
+
573
+
574
+
575
+
576
+
577
+
578
+
579
+ 3%|▎ | 599/20000 [18:27<9:50:40, 1.83s/it]
580
+
581
+
582
+
583
+
584
+
585
+
586
+
587
+
588
+
589
+
590
+
591
+
592
+
593
+
594
+
595
+
596
+
597
+
598
+
599
+
600
+
601
+
602
+
603
+ 3%|▎ | 624/20000 [19:12<9:51:17, 1.83s/it]
604
+
605
+
606
+
607
+
608
+
609
+
610
+
611
+
612
+
613
+
614
+
615
+
616
+
617
+
618
+
619
+
620
+
621
+
622
+
623
+
624
+
625
+
626
+
627
+ 3%|▎ | 650/20000 [20:00<9:52:05, 1.84s/it]
628
+
629
+
630
+
631
+
632
+
633
+
634
+
635
+
636
+
637
+
638
+
639
+
640
+
641
+
642
+
643
+
644
+
645
+
646
+
647
+
648
+
649
+
650
+
651
+ 3%|▎ | 675/20000 [20:46<9:46:02, 1.82s/it]
652
+
653
+
654
+
655
+
656
+
657
+
658
+
659
+
660
+
661
+
662
+
663
+
664
+
665
+
666
+
667
+
668
+
669
+
670
+
671
+
672
+
673
+
674
+
675
+ 4%|▎ | 700/20000 [21:31<9:46:33, 1.82s/it]
676
+
677
+
678
+
679
+
680
+
681
+
682
+
683
+
684
+
685
+
686
+
687
+
688
+
689
+
690
+
691
+
692
+
693
+
694
+
695
+
696
+
697
+
698
+
699
+ 4%|▎ | 725/20000 [22:17<9:44:06, 1.82s/it]
700
+
701
+
702
+
703
+
704
+
705
+
706
+
707
+
708
+
709
+
710
+
711
+
712
+
713
+
714
+
715
+
716
+
717
+
718
+
719
+
720
+
721
+
722
+ 4%|▎ | 749/20000 [23:00<9:42:34, 1.82s/it]
723
+
724
+
725
+
726
+
727
+
728
+
729
+
730
+
731
+
732
+
733
+
734
+
735
+
736
+
737
+
738
+
739
+
740
+
741
+
742
+
743
+
744
+
745
+
746
+ 4%|▍ | 775/20000 [23:47<9:39:30, 1.81s/it]
747
+
748
+
749
+
750
+
751
+
752
+
753
+
754
+
755
+
756
+
757
+
758
+
759
+
760
+
761
+
762
+
763
+
764
+
765
+
766
+
767
+
768
+
769
+
770
+ 4%|▍ | 800/20000 [24:33<9:38:43, 1.81s/it]
771
+
772
+
773
+
774
+
775
+
776
+
777
+
778
+
779
+
780
+
781
+
782
+
783
+
784
+
785
+
786
+
787
+
788
+
789
+
790
+
791
+
792
+
793
+ 4%|▍ | 825/20000 [25:18<9:36:49, 1.80s/it]
794
+
795
+
796
+
797
+
798
+
799
+
800
+
801
+
802
+
803
+
804
+
805
+
806
+
807
+
808
+
809
+
810
+
811
+
812
+
813
+
814
+
815
+
816
+
817
+ 4%|▍ | 850/20000 [26:03<9:37:19, 1.81s/it]
818
+
819
+
820
+
821
+
822
+
823
+
824
+
825
+
826
+
827
+
828
+
829
+
830
+
831
+
832
+
833
+
834
+
835
+
836
+
837
+
838
+
839
+
840
+ 4%|▍ | 874/20000 [26:47<9:35:28, 1.81s/it]
841
+
842
+
843
+
844
+
845
+
846
+
847
+
848
+
849
+
850
+
851
+
852
+
853
+
854
+
855
+
856
+
857
+
858
+
859
+
860
+
861
+
862
+
863
+
864
+ 4%|▍ | 900/20000 [27:34<9:37:13, 1.81s/it]
865
+
866
+
867
+
868
+
869
+
870
+
871
+
872
+
873
+
874
+
875
+
876
+
877
+
878
+
879
+
880
+
881
+
882
+
883
+
884
+
885
+
886
+
887
+
888
+ 5%|▍ | 925/20000 [28:19<9:34:17, 1.81s/it]
889
+
890
+
891
+
892
+
893
+
894
+
895
+
896
+
897
+
898
+
899
+
900
+
901
+
902
+
903
+
904
+
905
+
906
+
907
+
908
+
909
+
910
+
911
+ 5%|▍ | 949/20000 [29:02<9:33:34, 1.81s/it]
912
+
913
+
914
+
915
+
916
+
917
+
918
+
919
+
920
+
921
+
922
+
923
+
924
+
925
+
926
+
927
+
928
+
929
+
930
+
931
+
932
+
933
+
934
+
935
+ 5%|▍ | 975/20000 [29:49<9:30:45, 1.80s/it]
936
+
937
+
938
+
939
+
940
+
941
+
942
+
943
+
944
+
945
+
946
+
947
+
948
+
949
+
950
+
951
+
952
+
953
+
954
+
955
+
956
+
957
+
958
+ 5%|▍ | 999/20000 [30:33<9:36:05, 1.82s/it]
959
+
960
+
961
+
962
+
963
+
964
+
965
+
966
+
967
+
968
+
969
+
970
+
971
+
972
+
973
+
974
+
975
+
976
+
977
+
978
+
979
+
980
+
981
+
982
+ 5%|▌ | 1025/20000 [31:20<9:36:05, 1.82s/it]
983
+
984
+
985
+
986
+
987
+
988
+
989
+
990
+
991
+
992
+
993
+
994
+
995
+
996
+
997
+
998
+
999
+
1000
+
1001
+
1002
+
1003
+
1004
+
1005
+
1006
+ 5%|▌ | 1050/20000 [32:05<9:33:00, 1.81s/it]
1007
+
1008
+
1009
+
1010
+
1011
+
1012
+
1013
+
1014
+
1015
+
1016
+
1017
+
1018
+
1019
+
1020
+
1021
+
1022
+
1023
+
1024
+
1025
+
1026
+
1027
+
1028
+
1029
+ 5%|▌ | 1074/20000 [32:49<9:32:40, 1.82s/it]
1030
+
1031
+
1032
+
1033
+
1034
+
1035
+
1036
+
1037
+
1038
+
1039
+
1040
+
1041
+
1042
+
1043
+
1044
+
1045
+
1046
+
1047
+
1048
+
1049
+
1050
+
1051
+
1052
+
1053
+ 6%|▌ | 1100/20000 [33:36<9:29:59, 1.81s/it]
1054
+
1055
+
1056
+
1057
+
1058
+
1059
+
1060
+
1061
+
1062
+
1063
+
1064
+
1065
+
1066
+
1067
+
1068
+
1069
+
1070
+
1071
+
1072
+
1073
+
1074
+
1075
+
1076
+
1077
+ 6%|▌ | 1125/20000 [34:21<9:29:27, 1.81s/it]
1078
+
1079
+
1080
+
1081
+
1082
+
1083
+
1084
+
1085
+
1086
+
1087
+
1088
+
1089
+
1090
+
1091
+
1092
+
1093
+
1094
+
1095
+
1096
+
1097
+
1098
+
1099
+
1100
+ 6%|▌ | 1149/20000 [35:05<9:30:07, 1.81s/it]
1101
+
1102
+
1103
+
1104
+
1105
+
1106
+
1107
+
1108
+
1109
+
1110
+
1111
+
1112
+
1113
+
1114
+
1115
+
1116
+
1117
+
1118
+
1119
+
1120
+
1121
+
1122
+
1123
+
1124
+ 6%|▌ | 1175/20000 [35:52<9:27:54, 1.81s/it]
1125
+
1126
+
1127
+
1128
+
1129
+
1130
+
1131
+
1132
+
1133
+
1134
+
1135
+
1136
+
1137
+
1138
+
1139
+
1140
+
1141
+
1142
+
1143
+
1144
+
1145
+
1146
+
1147
+
1148
+ 6%|▌ | 1200/20000 [36:37<9:26:57, 1.81s/it]
1149
+
1150
+
1151
+
1152
+
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+
1170
+
1171
+ 6%|▌ | 1225/20000 [37:22<9:26:26, 1.81s/it]
1172
+
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+
1179
+
1180
+
1181
+
1182
+
1183
+
1184
+
1185
+
1186
+
1187
+
1188
+
1189
+
1190
+
1191
+
1192
+
1193
+
1194
+
1195
+ 6%|▋ | 1250/20000 [38:07<9:24:36, 1.81s/it]
1196
+
1197
+
1198
+
1199
+
1200
+
1201
+
1202
+
1203
+
1204
+
1205
+
1206
+
1207
+
1208
+
1209
+
1210
+
1211
+
1212
+
1213
+
1214
+
1215
+
1216
+
1217
+
1218
+ 6%|▋ | 1274/20000 [38:50<9:26:14, 1.81s/it]
1219
+
1220
+
1221
+
1222
+
1223
+
1224
+
1225
+
1226
+
1227
+
1228
+
1229
+
1230
+
1231
+
1232
+
1233
+
1234
+
1235
+
1236
+
1237
+
1238
+
1239
+
1240
+
1241
+
1242
+ 6%|▋ | 1300/20000 [39:38<9:27:53, 1.82s/it]
1243
+
1244
+
1245
+
1246
+
1247
+
1248
+
1249
+
1250
+
1251
+
1252
+
1253
+
1254
+
1255
+
1256
+
1257
+
1258
+
1259
+
1260
+
1261
+
1262
+
1263
+
1264
+
1265
+
1266
+ 7%|▋ | 1325/20000 [40:23<9:27:40, 1.82s/it]
1267
+
1268
+
1269
+
1270
+
1271
+
1272
+
1273
+
1274
+
1275
+
1276
+
1277
+
1278
+
1279
+
1280
+
1281
+
1282
+
1283
+
1284
+
1285
+
1286
+
1287
+
1288
+
1289
+ 7%|▋ | 1350/20000 [41:08<9:23:50, 1.81s/it]
1290
+
1291
+
1292
+
1293
+
1294
+
1295
+
1296
+
1297
+
1298
+
1299
+
1300
+
1301
+
1302
+
1303
+
1304
+
1305
+
1306
+
1307
+
1308
+
1309
+
1310
+
1311
+
1312
+
1313
+ 7%|▋ | 1375/20000 [41:53<9:19:57, 1.80s/it]
1314
+
1315
+
1316
+
1317
+
1318
+
1319
+
1320
+
1321
+
1322
+
1323
+
1324
+
1325
+
1326
+
1327
+
1328
+
1329
+
1330
+
1331
+
1332
+
1333
+
1334
+
1335
+
1336
+ 7%|▋ | 1399/20000 [42:37<9:18:21, 1.80s/it]
1337
+
1338
+
1339
+
1340
+
1341
+
1342
+
1343
+
1344
+
1345
+
1346
+
1347
+
1348
+
1349
+
1350
+
1351
+
1352
+
1353
+
1354
+
1355
+
1356
+
1357
+
1358
+
1359
+
1360
+ 7%|▋ | 1425/20000 [43:24<9:19:30, 1.81s/it]
1361
+
1362
+
1363
+
1364
+
1365
+
1366
+
1367
+
1368
+
1369
+
1370
+
1371
+
1372
+
1373
+
1374
+
1375
+
1376
+
1377
+
1378
+
1379
+
1380
+
1381
+
1382
+
1383
+
1384
+ 7%|▋ | 1450/20000 [44:09<9:18:24, 1.81s/it]
1385
+
1386
+
1387
+
1388
+
1389
+
1390
+
1391
+
1392
+
1393
+
1394
+
1395
+
1396
+
1397
+
1398
+
1399
+
1400
+
1401
+
1402
+
1403
+
1404
+
1405
+
1406
+
1407
+ 7%|▋ | 1475/20000 [44:54<9:16:54, 1.80s/it]
1408
+
1409
+
1410
+
1411
+
1412
+
1413
+
1414
+
1415
+
1416
+
1417
+
1418
+
1419
+
1420
+
1421
+
1422
+
1423
+
1424
+
1425
+
1426
+
1427
+
1428
+
1429
+
1430
+
1431
+ 8%|▊ | 1500/20000 [45:39<9:17:19, 1.81s/it]
1432
+
1433
+
1434
+
1435
+
1436
+
1437
+
1438
+
1439
+
1440
+
1441
+
1442
+
1443
+
1444
+
1445
+
1446
+
1447
+
1448
+
1449
+
1450
+
1451
+
1452
+
1453
+
1454
+ 8%|▊ | 1525/20000 [46:24<9:16:08, 1.81s/it]
1455
+
1456
+
1457
+
1458
+
1459
+
1460
+
1461
+
1462
+
1463
+
1464
+
1465
+
1466
+
1467
+
1468
+
1469
+
1470
+
1471
+
1472
+
1473
+
1474
+
1475
+
1476
+
1477
+
1478
+ 8%|▊ | 1550/20000 [47:09<9:14:14, 1.80s/it]
1479
+
1480
+
1481
+
1482
+
1483
+
1484
+
1485
+
1486
+
1487
+
1488
+
1489
+
1490
+
1491
+
1492
+
1493
+
1494
+
1495
+
1496
+
1497
+
1498
+
1499
+
1500
+
1501
+ 8%|▊ | 1575/20000 [47:54<9:13:14, 1.80s/it]
1502
+
1503
+
1504
+
1505
+
1506
+
1507
+
1508
+
1509
+
1510
+
1511
+
1512
+
1513
+
1514
+
1515
+
1516
+
1517
+
1518
+
1519
+
1520
+
1521
+
1522
+
1523
+
1524
+
1525
+ 8%|▊ | 1600/20000 [48:40<9:15:37, 1.81s/it]
1526
+
1527
+
1528
+
1529
+
1530
+
1531
+
1532
+
1533
+
1534
+
1535
+
1536
+
1537
+
1538
+
1539
+
1540
+
1541
+
1542
+
1543
+
1544
+
1545
+
1546
+
1547
+
1548
+ 8%|▊ | 1624/20000 [49:23<9:16:06, 1.82s/it]
1549
+
1550
+
1551
+
1552
+
1553
+
1554
+
1555
+
1556
+
1557
+
1558
+
1559
+
1560
+
1561
+
1562
+
1563
+
1564
+
1565
+
1566
+
1567
+
1568
+
1569
+
1570
+
1571
+
1572
+ 8%|▊ | 1650/20000 [50:10<9:16:47, 1.82s/it]
1573
+
1574
+
1575
+
1576
+
1577
+
1578
+
1579
+
1580
+
1581
+
1582
+
1583
+
1584
+
1585
+
1586
+
1587
+
1588
+
1589
+
1590
+
1591
+
1592
+
1593
+
1594
+
1595
+
1596
+ 8%|▊ | 1675/20000 [50:56<9:13:23, 1.81s/it]
1597
+
1598
+
1599
+
1600
+
1601
+
1602
+
1603
+
1604
+
1605
+
1606
+
1607
+
1608
+
1609
+
1610
+
1611
+
1612
+
1613
+
1614
+
1615
+
1616
+
1617
+
1618
+
1619
+ 8%|▊ | 1699/20000 [51:39<9:12:58, 1.81s/it]
1620
+
1621
+
1622
+
1623
+
1624
+
1625
+
1626
+
1627
+
1628
+
1629
+
1630
+
1631
+
1632
+
1633
+
1634
+
1635
+
1636
+
1637
+
1638
+
1639
+
1640
+
1641
+
1642
+
1643
+ 9%|▊ | 1725/20000 [52:26<9:11:49, 1.81s/it]
1644
+
1645
+
1646
+
1647
+
1648
+
1649
+
1650
+
1651
+
1652
+
1653
+
1654
+
1655
+
1656
+
1657
+
1658
+
1659
+
1660
+
1661
+
1662
+
1663
+
1664
+
1665
+
1666
+
1667
+ 9%|▉ | 1750/20000 [53:11<9:13:13, 1.82s/it]
1668
+
1669
+
1670
+
1671
+
1672
+
1673
+
1674
+
1675
+
1676
+
1677
+
1678
+
1679
+
1680
+
1681
+
1682
+
1683
+
1684
+
1685
+
1686
+
1687
+
1688
+
1689
+
1690
+ 9%|▉ | 1774/20000 [53:55<9:10:04, 1.81s/it]
1691
+
1692
+
1693
+
1694
+
1695
+
1696
+
1697
+
1698
+
1699
+
1700
+
1701
+
1702
+
1703
+
1704
+
1705
+
1706
+
1707
+
1708
+
1709
+
1710
+
1711
+
1712
+
1713
+
1714
+ 9%|▉ | 1800/20000 [54:42<9:09:42, 1.81s/it]
1715
+
1716
+
1717
+
1718
+
1719
+
1720
+
1721
+
1722
+
1723
+
1724
+
1725
+
1726
+
1727
+
1728
+
1729
+
1730
+
1731
+
1732
+
1733
+
1734
+
1735
+
1736
+
1737
+
1738
+ 9%|▉ | 1825/20000 [55:27<9:09:36, 1.81s/it]
1739
+
1740
+
1741
+
1742
+
1743
+
1744
+
1745
+
1746
+
1747
+
1748
+
1749
+
1750
+
1751
+
1752
+
1753
+
1754
+
1755
+
1756
+
1757
+
1758
+
1759
+
1760
+
1761
+ 9%|▉ | 1849/20000 [56:11<9:11:01, 1.82s/it]
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+ 9%|▉ | 1875/20000 [56:58<9:09:44, 1.82s/it]
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+ 10%|▉ | 1900/20000 [57:44<9:07:10, 1.81s/it]
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+
1831
+
1832
+
1833
+ 10%|▉ | 1925/20000 [58:29<9:06:27, 1.81s/it]
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+ 10%|▉ | 1950/20000 [59:14<9:04:25, 1.81s/it]
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+ 10%|▉ | 1975/20000 [1:00:00<9:06:51, 1.82s/it]
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+ 10%|█ | 2000/20000 [1:00:46<9:04:32, 1.82s/it]
1905
+
1906
+
1907
+
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+ 10%|█ | 2024/20000 [1:01:29<9:06:34, 1.82s/it]
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+ 10%|█ | 2050/20000 [1:02:16<9:01:46, 1.81s/it]
1952
+
1953
+
1954
+
1955
+
1956
+
1957
+
1958
+
1959
+
1960
+
1961
+
1962
+
1963
+
1964
+
1965
+
1966
+
1967
+
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+ 10%|█ | 2075/20000 [1:03:01<9:01:26, 1.81s/it]
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+
1986
+
1987
+
1988
+
1989
+
1990
+
1991
+
1992
+
1993
+
1994
+
1995
+
1996
+
1997
+
1998
+ 10%|█ | 2099/20000 [1:03:45<9:00:23, 1.81s/it]
1999
+
2000
+
2001
+
2002
+
2003
+
2004
+
2005
+
2006
+
2007
+
2008
+
2009
+
2010
+
2011
+
2012
+
2013
+
2014
+
2015
+
2016
+
2017
+
2018
+
2019
+
2020
+
2021
+
2022
+ 11%|█ | 2125/20000 [1:04:32<9:00:23, 1.81s/it]
2023
+
2024
+
2025
+
2026
+
2027
+
2028
+
2029
+
2030
+
2031
+
2032
+
2033
+
2034
+
2035
+
2036
+
2037
+
2038
+
2039
+
2040
+
2041
+
2042
+
2043
+
2044
+
2045
+
2046
+ 11%|█ | 2150/20000 [1:05:17<8:58:17, 1.81s/it]
2047
+
2048
+
2049
+
2050
+
2051
+
2052
+
2053
+
2054
+
2055
+
2056
+
2057
+
2058
+
2059
+
2060
+
2061
+
2062
+
2063
+
2064
+
2065
+
2066
+
2067
+
2068
+
2069
+ 11%|█ | 2175/20000 [1:06:03<8:58:27, 1.81s/it]
2070
+
2071
+
2072
+
2073
+
2074
+
2075
+
2076
+
2077
+
2078
+
2079
+
2080
+
2081
+
2082
+
2083
+
2084
+
2085
+
2086
+
2087
+
2088
+
2089
+
2090
+
2091
+
2092
+
2093
+ 11%|█ | 2200/20000 [1:06:48<8:56:43, 1.81s/it]
2094
+
2095
+
2096
+
2097
+
2098
+
2099
+
2100
+
2101
+
2102
+
2103
+
2104
+
2105
+
2106
+
2107
+
2108
+
2109
+
2110
+
2111
+
2112
+
2113
+
2114
+
2115
+
2116
+
2117
+ 11%|█ | 2225/20000 [1:07:33<8:57:12, 1.81s/it]
2118
+
2119
+
2120
+
2121
+
2122
+
2123
+
2124
+
2125
+
2126
+
2127
+
2128
+
2129
+
2130
+
2131
+
2132
+
2133
+
2134
+
2135
+
2136
+
2137
+
2138
+
2139
+
2140
+ 11%|█▏ | 2250/20000 [1:08:18<8:56:31, 1.81s/it]
2141
+
2142
+
2143
+
2144
+
2145
+
2146
+
2147
+
2148
+
2149
+
2150
+
2151
+
2152
+
2153
+
2154
+
2155
+
2156
+
2157
+
2158
+
2159
+
2160
+
2161
+
2162
+
2163
+
2164
+ 11%|█▏ | 2275/20000 [1:09:04<8:58:46, 1.82s/it]
2165
+
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+
2172
+
2173
+
2174
+
2175
+
2176
+
2177
+
2178
+
2179
+
2180
+
2181
+
2182
+
2183
+
2184
+
2185
+
2186
+
2187
+
2188
+ 12%|█▏ | 2300/20000 [1:09:50<9:03:03, 1.84s/it]
2189
+
2190
+
2191
+
2192
+
2193
+
2194
+
2195
+
2196
+
2197
+
2198
+
2199
+
2200
+
2201
+
2202
+
2203
+
2204
+
2205
+
2206
+
2207
+
2208
+
2209
+
2210
+
2211
+
2212
+ 12%|█▏ | 2325/20000 [1:10:35<8:58:08, 1.83s/it]
2213
+
2214
+
2215
+
2216
+
2217
+
2218
+
2219
+
2220
+
2221
+
2222
+
2223
+
2224
+
2225
+
2226
+
2227
+
2228
+
2229
+
2230
+
2231
+
2232
+
2233
+
2234
+
2235
+ 12%|█▏ | 2350/20000 [1:11:21<8:52:13, 1.81s/it]
2236
+
2237
+
2238
+
2239
+
2240
+
2241
+
2242
+
2243
+
2244
+
2245
+
2246
+
2247
+
2248
+
2249
+
2250
+
2251
+
2252
+
2253
+
2254
+
2255
+
2256
+
2257
+
2258
+
2259
+ 12%|█▏ | 2375/20000 [1:12:06<8:52:21, 1.81s/it]
2260
+
2261
+
2262
+
2263
+
2264
+
2265
+
2266
+
2267
+
2268
+
2269
+
2270
+
2271
+
2272
+
2273
+
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+
2281
+
2282
+
2283
+ 12%|█▏ | 2400/20000 [1:12:51<8:52:32, 1.82s/it]
2284
+
2285
+
2286
+
2287
+
2288
+
2289
+
2290
+
2291
+
2292
+
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+ 12%|█▏ | 2425/20000 [1:13:36<8:47:50, 1.80s/it]
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+ 12%|█▏ | 2450/20000 [1:14:21<8:47:48, 1.80s/it]
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+ 12%|█▏ | 2475/20000 [1:15:06<8:48:29, 1.81s/it]
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+ 12%|█▎ | 2500/20000 [1:15:52<8:46:28, 1.81s/it]
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+ 13%|█▎ | 2525/20000 [1:16:37<8:44:37, 1.80s/it]
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+ 13%|█▎ | 2550/20000 [1:17:22<8:44:18, 1.80s/it]
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+ 13%|█▎ | 2574/20000 [1:18:05<8:47:07, 1.81s/it]
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+ 13%|█▎ | 2600/20000 [1:18:52<8:43:49, 1.81s/it]
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+ 13%|█▎ | 2625/20000 [1:19:38<8:43:27, 1.81s/it]
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+ 13%|█▎ | 2650/20000 [1:20:23<8:41:59, 1.81s/it]
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+ 13%|█▎ | 2675/20000 [1:21:08<8:42:45, 1.81s/it]
2543
+
2544
+
2545
+
2546
+
2547
+
2548
+
2549
+
2550
+
2551
+
2552
+
2553
+
2554
+
2555
+
2556
+
2557
+
2558
+
2559
+
2560
+
2561
+
2562
+
2563
+
2564
+
2565
+
2566
+ 14%|█▎ | 2700/20000 [1:21:53<8:42:27, 1.81s/it]
2567
+
2568
+
2569
+
2570
+
2571
+
2572
+
2573
+
2574
+
2575
+
2576
+
2577
+
2578
+
2579
+
2580
+
2581
+
2582
+
2583
+
2584
+
2585
+
2586
+
2587
+
2588
+
2589
+ 14%|█▎ | 2725/20000 [1:22:39<8:53:07, 1.85s/it]
2590
+
2591
+
2592
+
2593
+
2594
+
2595
+
2596
+
2597
+
2598
+
2599
+
2600
+
2601
+
2602
+
2603
+
2604
+
2605
+
2606
+
2607
+
2608
+
2609
+
2610
+
2611
+
2612
+
2613
+ 14%|█▍ | 2750/20000 [1:23:24<8:41:36, 1.81s/it]
2614
+
2615
+
2616
+
2617
+
2618
+
2619
+
2620
+
2621
+
2622
+
2623
+
2624
+
2625
+
2626
+
2627
+
2628
+
2629
+
2630
+
2631
+
2632
+
2633
+
2634
+
2635
+
2636
+
2637
+ 14%|█▍ | 2775/20000 [1:24:10<8:43:00, 1.82s/it]
2638
+
2639
+
2640
+
2641
+
2642
+
2643
+
2644
+
2645
+
2646
+
2647
+
2648
+
2649
+
2650
+
2651
+
2652
+
2653
+
2654
+
2655
+
2656
+
2657
+
2658
+
2659
+
2660
+
2661
+ 14%|█▍ | 2800/20000 [1:24:55<8:39:24, 1.81s/it]
2662
+
2663
+
2664
+
2665
+
2666
+
2667
+
2668
+
2669
+
2670
+
2671
+
2672
+
2673
+
2674
+
2675
+
2676
+
2677
+
2678
+
2679
+
2680
+
2681
+
2682
+
2683
+
2684
+ 14%|█▍ | 2825/20000 [1:25:40<8:38:34, 1.81s/it]
2685
+
2686
+
2687
+
2688
+
2689
+
2690
+
2691
+
2692
+
2693
+
2694
+
2695
+
2696
+
2697
+
2698
+
2699
+
2700
+
2701
+
2702
+
2703
+
2704
+
2705
+
2706
+
2707
+
2708
+ 14%|█▍ | 2850/20000 [1:26:26<8:36:46, 1.81s/it]
2709
+
2710
+
2711
+
2712
+
2713
+
2714
+
2715
+
2716
+
2717
+
2718
+
2719
+
2720
+
2721
+
2722
+
2723
+
2724
+
2725
+
2726
+
2727
+
2728
+
2729
+
2730
+
2731
+ 14%|█▍ | 2875/20000 [1:27:11<8:35:48, 1.81s/it]
2732
+
2733
+
2734
+
2735
+
2736
+
2737
+
2738
+
2739
+
2740
+
2741
+
2742
+
2743
+
2744
+
2745
+
2746
+
2747
+
2748
+
2749
+
2750
+
2751
+
2752
+
2753
+
2754
+
2755
+ 14%|█▍ | 2900/20000 [1:27:56<8:35:55, 1.81s/it]
2756
+
2757
+
2758
+
2759
+
2760
+
2761
+
2762
+
2763
+
2764
+
2765
+
2766
+
2767
+
2768
+
2769
+
2770
+
2771
+
2772
+
2773
+
2774
+
2775
+
2776
+
2777
+
2778
+ 15%|█▍ | 2924/20000 [1:28:39<8:32:38, 1.80s/it]
2779
+
2780
+
2781
+
2782
+
2783
+
2784
+
2785
+
2786
+
2787
+
2788
+
2789
+
2790
+
2791
+
2792
+
2793
+
2794
+
2795
+
2796
+
2797
+
2798
+
2799
+
2800
+
2801
+
2802
+ 15%|█▍ | 2950/20000 [1:29:26<8:31:07, 1.80s/it]
2803
+
2804
+
2805
+
2806
+
2807
+
2808
+
2809
+
2810
+
2811
+
2812
+
2813
+
2814
+
2815
+
2816
+
2817
+
2818
+
2819
+
2820
+
2821
+
2822
+
2823
+
2824
+
2825
+ 15%|█▍ | 2974/20000 [1:30:09<8:30:59, 1.80s/it]
2826
+
2827
+
2828
+
2829
+
2830
+
2831
+
2832
+
2833
+
2834
+
2835
+
2836
+
2837
+
2838
+
2839
+
2840
+
2841
+
2842
+
2843
+
2844
+
2845
+
2846
+
2847
+
2848
+
2849
+ 15%|█▌ | 3000/20000 [1:30:56<8:30:46, 1.80s/it]
2850
+
2851
+
2852
+
2853
+
2854
+
2855
+
2856
+
2857
+
2858
+
2859
+
2860
+
2861
+
2862
+
2863
+
2864
+
2865
+
2866
+
2867
+
2868
+
2869
+
2870
+
2871
+
2872
+ 15%|█▌ | 3024/20000 [1:31:39<8:27:44, 1.79s/it]
2873
+
2874
+
2875
+
2876
+
2877
+
2878
+
2879
+
2880
+
2881
+
2882
+
2883
+
2884
+
2885
+
2886
+
2887
+
2888
+
2889
+
2890
+
2891
+
2892
+
2893
+
2894
+
2895
+
2896
+ 15%|█▌ | 3050/20000 [1:32:26<8:26:55, 1.79s/it]
2897
+
2898
+
2899
+
2900
+
2901
+
2902
+
2903
+
2904
+
2905
+
2906
+
2907
+
2908
+
2909
+
2910
+
2911
+
2912
+
2913
+
2914
+
2915
+
2916
+
2917
+
2918
+
2919
+ 15%|█▌ | 3074/20000 [1:33:09<8:26:07, 1.79s/it]
2920
+
2921
+
2922
+
2923
+
2924
+
2925
+
2926
+
2927
+
2928
+
2929
+
2930
+
2931
+
2932
+
2933
+
2934
+
2935
+
2936
+
2937
+
2938
+
2939
+
2940
+
2941
+
2942
+
2943
+ 16%|█▌ | 3100/20000 [1:33:56<8:26:42, 1.80s/it]
2944
+
2945
+
2946
+
2947
+
2948
+
2949
+
2950
+
2951
+
2952
+
2953
+
2954
+
2955
+
2956
+
2957
+
2958
+
2959
+
2960
+
2961
+
2962
+
2963
+
2964
+
2965
+
2966
+ 16%|█▌ | 3125/20000 [1:34:41<8:25:36, 1.80s/it]
2967
+
2968
+
2969
+
2970
+
2971
+
2972
+
2973
+
2974
+
2975
+
2976
+
2977
+
2978
+
2979
+
2980
+
2981
+
2982
+
2983
+
2984
+
2985
+
2986
+
2987
+
2988
+
2989
+
2990
+ 16%|█▌ | 3150/20000 [1:35:26<8:24:38, 1.80s/it]
2991
+
2992
+
2993
+
2994
+
2995
+
2996
+
2997
+
2998
+
2999
+
3000
+
3001
+
3002
+
3003
+
3004
+
3005
+
3006
+
3007
+
3008
+
3009
+
3010
+
3011
+
3012
+
3013
+ 16%|█▌ | 3175/20000 [1:36:11<8:23:39, 1.80s/it]
3014
+
3015
+
3016
+
3017
+
3018
+
3019
+
3020
+
3021
+
3022
+
3023
+
3024
+
3025
+
3026
+
3027
+
3028
+
3029
+
3030
+
3031
+
3032
+
3033
+
3034
+
3035
+
3036
+
3037
+ 16%|█▌ | 3200/20000 [1:36:56<8:24:08, 1.80s/it]
3038
+
3039
+
3040
+
3041
+
3042
+
3043
+
3044
+
3045
+
3046
+
3047
+
3048
+
3049
+
3050
+
3051
+
3052
+
3053
+
3054
+
3055
+
3056
+
3057
+
3058
+
3059
+
3060
+ 16%|█▌ | 3225/20000 [1:37:41<8:26:18, 1.81s/it]
3061
+
3062
+
3063
+
3064
+
3065
+
3066
+
3067
+
3068
+
3069
+
3070
+
3071
+
3072
+
3073
+
3074
+
3075
+
3076
+
3077
+
3078
+
3079
+
3080
+
3081
+
3082
+
3083
+
3084
+ 16%|█▋ | 3250/20000 [1:38:26<8:21:01, 1.79s/it]
3085
+
3086
+
3087
+
3088
+
3089
+
3090
+
3091
+
3092
+
3093
+
3094
+
3095
+
3096
+
3097
+
3098
+
3099
+
3100
+
3101
+
3102
+
3103
+
3104
+
3105
+
3106
+
3107
+ 16%|█▋ | 3275/20000 [1:39:10<8:20:16, 1.79s/it]
3108
+
3109
+
3110
+
3111
+
3112
+
3113
+
3114
+
3115
+
3116
+
3117
+
3118
+
3119
+
3120
+
3121
+
3122
+
3123
+
3124
+
3125
+
3126
+
3127
+
3128
+
3129
+
3130
+ 16%|█▋ | 3300/20000 [1:39:55<8:19:56, 1.80s/it]
3131
+
3132
+
3133
+
3134
+
3135
+
3136
+
3137
+
3138
+
3139
+
3140
+
3141
+
3142
+
3143
+
3144
+
3145
+
3146
+
3147
+
3148
+
3149
+
3150
+
3151
+
3152
+
3153
+
3154
+ 17%|█▋ | 3325/20000 [1:40:40<8:19:52, 1.80s/it]
3155
+
3156
+
3157
+
3158
+
3159
+
3160
+
3161
+
3162
+
3163
+
3164
+
3165
+
3166
+
3167
+
3168
+
3169
+
3170
+
3171
+
3172
+
3173
+
3174
+
3175
+
3176
+
3177
+ 17%|█▋ | 3350/20000 [1:41:25<8:16:15, 1.79s/it]
3178
+
3179
+
3180
+
3181
+
3182
+
3183
+
3184
+
3185
+
3186
+
3187
+
3188
+
3189
+
3190
+
3191
+
3192
+
3193
+
3194
+
3195
+
3196
+
3197
+
3198
+
3199
+
3200
+
3201
+ 17%|█▋ | 3375/20000 [1:42:10<8:15:54, 1.79s/it]
3202
+
3203
+
3204
+
3205
+
3206
+
3207
+
3208
+
3209
+
3210
+
3211
+
3212
+
3213
+
3214
+
3215
+
3216
+
3217
+
3218
+
3219
+
3220
+
3221
+
3222
+
3223
+
3224
+ 17%|█▋ | 3400/20000 [1:42:55<8:15:14, 1.79s/it]
3225
+
3226
+
3227
+
3228
+
3229
+
3230
+
3231
+
3232
+
3233
+
3234
+
3235
+
3236
+
3237
+
3238
+
3239
+
3240
+
3241
+
3242
+
3243
+
3244
+
3245
+
3246
+
3247
+
3248
+ 17%|█▋ | 3425/20000 [1:43:39<8:14:08, 1.79s/it]
3249
+
3250
+
3251
+
3252
+
3253
+
3254
+
3255
+
3256
+
3257
+
3258
+
3259
+
3260
+
3261
+
3262
+
3263
+
3264
+
3265
+
3266
+
3267
+
3268
+
3269
+
3270
+
3271
+ 17%|█▋ | 3450/20000 [1:44:24<8:16:50, 1.80s/it]
3272
+
3273
+
3274
+
3275
+
3276
+
3277
+
3278
+
3279
+
3280
+
3281
+
3282
+
3283
+
3284
+
3285
+
3286
+
3287
+
3288
+
3289
+
3290
+
3291
+
3292
+
3293
+
3294
+ 17%|█▋ | 3475/20000 [1:45:09<8:15:51, 1.80s/it]
3295
+
3296
+
3297
+
3298
+
3299
+
3300
+
3301
+
3302
+
3303
+
3304
+
3305
+
3306
+
3307
+
3308
+
3309
+
3310
+
3311
+
3312
+
3313
+
3314
+
3315
+
3316
+
3317
+
3318
+ 18%|█▊ | 3500/20000 [1:45:54<8:13:44, 1.80s/it]
3319
+
3320
+
3321
+
3322
+
3323
+
3324
+
3325
+
3326
+
3327
+
3328
+
3329
+
3330
+
3331
+
3332
+
3333
+
3334
+
3335
+
3336
+
3337
+
3338
+
3339
+
3340
+
3341
+ 18%|█▊ | 3525/20000 [1:46:39<8:13:09, 1.80s/it]
3342
+
3343
+
3344
+
3345
+
3346
+
3347
+
3348
+
3349
+
3350
+
3351
+
3352
+
3353
+
3354
+
3355
+
3356
+
3357
+
3358
+
3359
+
3360
+
3361
+
3362
+
3363
+
3364
+
3365
+ 18%|█▊ | 3550/20000 [1:47:24<8:11:44, 1.79s/it]
3366
+
3367
+
3368
+
3369
+
3370
+
3371
+
3372
+
3373
+
3374
+
3375
+
3376
+
3377
+
3378
+
3379
+
3380
+
3381
+
3382
+
3383
+
3384
+
3385
+
3386
+
3387
+
3388
+ 18%|█▊ | 3575/20000 [1:48:09<8:10:58, 1.79s/it]
3389
+
3390
+
3391
+
3392
+
3393
+
3394
+
3395
+
3396
+
3397
+
3398
+
3399
+
3400
+
3401
+
3402
+
3403
+
3404
+
3405
+
3406
+
3407
+
3408
+
3409
+
3410
+
3411
+
3412
+ 18%|█▊ | 3601/20000 [1:48:55<8:10:48, 1.80s/it]
3413
+
3414
+
3415
+
3416
+
3417
+
3418
+
3419
+
3420
+
3421
+
3422
+
3423
+
3424
+
3425
+
3426
+
3427
+
3428
+
3429
+
3430
+
3431
+
3432
+
3433
+
3434
+
3435
+ 18%|█▊ | 3625/20000 [1:49:38<8:08:12, 1.79s/it]
3436
+
3437
+
3438
+
3439
+
3440
+
3441
+
3442
+
3443
+
3444
+
3445
+
3446
+
3447
+
3448
+
3449
+
3450
+
3451
+
3452
+
3453
+
3454
+
3455
+
3456
+
3457
+
3458
+ 18%|█▊ | 3650/20000 [1:50:23<8:08:08, 1.79s/it]
3459
+
3460
+
3461
+
3462
+
3463
+
3464
+
3465
+
3466
+
3467
+
3468
+
3469
+
3470
+
3471
+
3472
+
3473
+
3474
+
3475
+
3476
+
3477
+
3478
+
3479
+
3480
+
3481
+
3482
+ 18%|█▊ | 3675/20000 [1:51:08<8:07:02, 1.79s/it]
3483
+
3484
+
3485
+
3486
+
3487
+
3488
+
3489
+
3490
+
3491
+
3492
+
3493
+
3494
+
3495
+
3496
+
3497
+
3498
+
3499
+
3500
+
3501
+
3502
+
3503
+
3504
+
3505
+ 18%|█▊ | 3700/20000 [1:51:53<8:06:03, 1.79s/it]
3506
+
3507
+
3508
+
3509
+
3510
+
3511
+
3512
+
3513
+
3514
+
3515
+
3516
+
3517
+
3518
+
3519
+
3520
+
3521
+
3522
+
3523
+
3524
+
3525
+
3526
+
3527
+
3528
+ 19%|█▊ | 3724/20000 [1:52:36<8:05:20, 1.79s/it]
3529
+
3530
+
3531
+
3532
+
3533
+
3534
+
3535
+
3536
+
3537
+
3538
+
3539
+
3540
+
3541
+
3542
+
3543
+
3544
+
3545
+
3546
+
3547
+
3548
+
3549
+
3550
+
3551
+
3552
+ 19%|█▉ | 3750/20000 [1:53:22<8:04:42, 1.79s/it]
3553
+
3554
+
3555
+
3556
+
3557
+
3558
+
3559
+
3560
+
3561
+
3562
+
3563
+
3564
+
3565
+
3566
+
3567
+
3568
+
3569
+
3570
+
3571
+
3572
+
3573
+
3574
+
3575
+ 19%|█▉ | 3775/20000 [1:54:07<8:05:38, 1.80s/it]
3576
+
3577
+
3578
+
3579
+
3580
+
3581
+
3582
+
3583
+
3584
+
3585
+
3586
+
3587
+
3588
+
3589
+
3590
+
3591
+
3592
+
3593
+
3594
+
3595
+
3596
+
3597
+
3598
+
3599
+ 19%|█▉ | 3800/20000 [1:54:52<8:03:02, 1.79s/it]
3600
+
3601
+
3602
+
3603
+
3604
+
3605
+
3606
+
3607
+
3608
+
3609
+
3610
+
3611
+
3612
+
3613
+
3614
+
3615
+
3616
+
3617
+
3618
+
3619
+
3620
+
3621
+
3622
+ 19%|█▉ | 3825/20000 [1:55:37<8:02:57, 1.79s/it]
3623
+
3624
+
3625
+
3626
+
3627
+
3628
+
3629
+
3630
+
3631
+
3632
+
3633
+
3634
+
3635
+
3636
+
3637
+
3638
+
3639
+
3640
+
3641
+
3642
+
3643
+
3644
+
3645
+ 19%|█▉ | 3849/20000 [1:56:20<8:02:09, 1.79s/it]
3646
+
3647
+
3648
+
3649
+
3650
+
3651
+
3652
+
3653
+
3654
+
3655
+
3656
+
3657
+
3658
+
3659
+
3660
+
3661
+
3662
+
3663
+
3664
+
3665
+
3666
+
3667
+
3668
+
3669
+ 19%|█▉ | 3875/20000 [1:57:06<8:01:50, 1.79s/it]
3670
+
3671
+
3672
+
3673
+
3674
+
3675
+
3676
+
3677
+
3678
+
3679
+
3680
+
3681
+
3682
+
3683
+
3684
+
3685
+
3686
+
3687
+
3688
+
3689
+
3690
+
3691
+
3692
+ 20%|█▉ | 3900/20000 [1:57:51<8:01:23, 1.79s/it]
3693
+
3694
+
3695
+
3696
+
3697
+
3698
+
3699
+
3700
+
3701
+
3702
+
3703
+
3704
+
3705
+
3706
+
3707
+
3708
+
3709
+
3710
+
3711
+
3712
+
3713
+
3714
+
3715
+
3716
+ 20%|█▉ | 3925/20000 [1:58:36<8:02:26, 1.80s/it]
3717
+
3718
+
3719
+
3720
+
3721
+
3722
+
3723
+
3724
+
3725
+
3726
+
3727
+
3728
+
3729
+
3730
+
3731
+
3732
+
3733
+
3734
+
3735
+
3736
+
3737
+
3738
+
3739
+ 20%|█▉ | 3950/20000 [1:59:21<8:01:28, 1.80s/it]
3740
+
3741
+
3742
+
3743
+
3744
+
3745
+
3746
+
3747
+
3748
+
3749
+
3750
+
3751
+
3752
+
3753
+
3754
+
3755
+
3756
+
3757
+
3758
+
3759
+
3760
+
3761
+
3762
+ 20%|█▉ | 3975/20000 [2:00:05<7:57:29, 1.79s/it]
3763
+
3764
+
3765
+
3766
+
3767
+
3768
+
3769
+
3770
+
3771
+
3772
+
3773
+
3774
+
3775
+
3776
+
3777
+
3778
+
3779
+
3780
+
3781
+
3782
+
3783
+
3784
+
3785
+
3786
+ 20%|██ | 4000/20000 [2:00:50<7:58:23, 1.79s/it]
3787
+
3788
+
3789
+
3790
+
3791
+
3792
+
3793
+
3794
+
3795
+
3796
+
3797
+
3798
+
3799
+
3800
+
3801
+
3802
+
3803
+
3804
+
3805
+
3806
+
3807
+
3808
+
3809
+ 20%|██ | 4025/20000 [2:01:35<7:57:08, 1.79s/it]
3810
+
3811
+
3812
+
3813
+
3814
+
3815
+
3816
+
3817
+
3818
+
3819
+
3820
+
3821
+
3822
+
3823
+
3824
+
3825
+
3826
+
3827
+
3828
+
3829
+
3830
+
3831
+
3832
+
3833
+ 20%|██ | 4050/20000 [2:02:20<7:52:32, 1.78s/it]
3834
+
3835
+
3836
+
3837
+
3838
+
3839
+
3840
+
3841
+
3842
+
3843
+
3844
+
3845
+
3846
+
3847
+
3848
+
3849
+
3850
+
3851
+
3852
+
3853
+
3854
+
3855
+
3856
+ 20%|██ | 4075/20000 [2:03:04<7:50:57, 1.77s/it]
3857
+
3858
+
3859
+
3860
+
3861
+
3862
+
3863
+
3864
+
3865
+
3866
+
3867
+
3868
+
3869
+
3870
+
3871
+
3872
+
3873
+
3874
+
3875
+
3876
+
3877
+
3878
+
3879
+ 20%|██ | 4100/20000 [2:03:49<7:49:25, 1.77s/it]
3880
+
3881
+
3882
+
3883
+
3884
+
3885
+
3886
+
3887
+
3888
+
3889
+
3890
+
3891
+
3892
+
3893
+
3894
+
3895
+
3896
+
3897
+
3898
+
3899
+
3900
+
3901
+
3902
+ 21%|██ | 4125/20000 [2:04:33<7:49:30, 1.77s/it]
3903
+
3904
+
3905
+
3906
+
3907
+
3908
+
3909
+
3910
+
3911
+
3912
+
3913
+
3914
+
3915
+
3916
+
3917
+
3918
+
3919
+
3920
+
3921
+
3922
+
3923
+
3924
+
3925
+
3926
+ 21%|██ | 4151/20000 [2:05:20<7:49:47, 1.78s/it]
3927
+
3928
+
3929
+
3930
+
3931
+
3932
+
3933
+
3934
+
3935
+
3936
+
3937
+
3938
+
3939
+
3940
+
3941
+
3942
+
3943
+
3944
+
3945
+
3946
+
3947
+
3948
+
3949
+ 21%|██ | 4175/20000 [2:06:02<7:48:55, 1.78s/it]
3950
+
3951
+
3952
+
3953
+
3954
+
3955
+
3956
+
3957
+
3958
+
3959
+
3960
+
3961
+
3962
+
3963
+
3964
+
3965
+
3966
+
3967
+
3968
+
3969
+
3970
+
3971
+
3972
+ 21%|██ | 4200/20000 [2:06:46<7:47:51, 1.78s/it]
3973
+
3974
+
3975
+
3976
+
3977
+
3978
+
3979
+
3980
+
3981
+
3982
+
3983
+
3984
+
3985
+
3986
+
3987
+
3988
+
3989
+
3990
+
3991
+
3992
+
3993
+
3994
+
3995
+ 21%|██ | 4225/20000 [2:07:31<7:48:05, 1.78s/it]
3996
+
3997
+
3998
+
3999
+
4000
+
4001
+
4002
+
4003
+
4004
+
4005
+
4006
+
4007
+
4008
+
4009
+
4010
+
4011
+
4012
+
4013
+
4014
+
4015
+
4016
+
4017
+
4018
+ 21%|██▏ | 4250/20000 [2:08:15<7:46:36, 1.78s/it]
4019
+
4020
+
4021
+
4022
+
4023
+
4024
+
4025
+
4026
+
4027
+
4028
+
4029
+
4030
+
4031
+
4032
+
4033
+
4034
+
4035
+
4036
+
4037
+
4038
+
4039
+
4040
+
4041
+
4042
+ 21%|██▏ | 4276/20000 [2:09:01<7:47:01, 1.78s/it]
4043
+
4044
+
4045
+
4046
+
4047
+
4048
+
4049
+
4050
+
4051
+
4052
+
4053
+
4054
+
4055
+
4056
+
4057
+
4058
+
4059
+
4060
+
4061
+
4062
+
4063
+
4064
+
4065
+ 22%|██▏ | 4300/20000 [2:09:44<7:44:39, 1.78s/it]
4066
+
4067
+
4068
+
4069
+
4070
+
4071
+
4072
+
4073
+
4074
+
4075
+
4076
+
4077
+
4078
+
4079
+
4080
+
4081
+
4082
+
4083
+
4084
+
4085
+
4086
+
4087
+
4088
+ 22%|██▏ | 4325/20000 [2:10:28<7:42:26, 1.77s/it]
4089
+
4090
+
4091
+
4092
+
4093
+
4094
+
4095
+
4096
+
4097
+
4098
+
4099
+
4100
+
4101
+
4102
+
4103
+
4104
+
4105
+
4106
+
4107
+
4108
+
4109
+
4110
+
4111
+ 22%|██▏ | 4350/20000 [2:11:13<7:45:20, 1.78s/it]
4112
+
4113
+
4114
+
4115
+
4116
+
4117
+
4118
+
4119
+
4120
+
4121
+
4122
+
4123
+
4124
+
4125
+
4126
+
4127
+
4128
+
4129
+
4130
+
4131
+
4132
+
4133
+
4134
+ 22%|██▏ | 4375/20000 [2:11:57<7:42:02, 1.77s/it]
4135
+
4136
+
4137
+
4138
+
4139
+
4140
+
4141
+
4142
+
4143
+
4144
+
4145
+
4146
+
4147
+
4148
+
4149
+
4150
+
4151
+
4152
+
4153
+
4154
+
4155
+
4156
+
4157
+
4158
+
4159
+ 22%|██▏ | 4400/20000 [2:12:53<8:20:58, 1.93s/it]
4160
+
4161
+
4162
+
4163
+
4164
+
4165
+
4166
+
4167
+
4168
+
4169
+
4170
+
4171
+
4172
+
4173
+
4174
+
4175
+
4176
+
4177
+
4178
+
4179
+
4180
+
4181
+
4182
+
4183
+
4184
+ 22%|██▏ | 4425/20000 [2:13:41<8:12:52, 1.90s/it]
4185
+
4186
+
4187
+
4188
+
4189
+
4190
+
4191
+
4192
+
4193
+
4194
+
4195
+
4196
+
4197
+
4198
+
4199
+
4200
+
4201
+
4202
+
4203
+
4204
+
4205
+
4206
+
4207
+
4208
+
4209
+ 22%|██▏ | 4450/20000 [2:14:28<8:08:10, 1.88s/it]
4210
+
4211
+
4212
+
4213
+
4214
+
4215
+
4216
+
4217
+
4218
+
4219
+
4220
+
4221
+
4222
+
4223
+
4224
+
4225
+
4226
+
4227
+
4228
+
4229
+
4230
+
4231
+
4232
+
4233
+ 22%|██▏ | 4475/20000 [2:15:15<8:03:07, 1.87s/it]
4234
+
4235
+
4236
+
4237
+
4238
+
4239
+
4240
+
4241
+
4242
+
4243
+
4244
+
4245
+
4246
+
4247
+
4248
+
4249
+
4250
+
4251
+
4252
+
4253
+
4254
+
4255
+
4256
+
4257
+
4258
+ 22%|██▎ | 4500/20000 [2:16:02<8:00:51, 1.86s/it]
4259
+
4260
+
4261
+
4262
+
4263
+
4264
+
4265
+
4266
+
4267
+
4268
+
4269
+
4270
+
4271
+
4272
+
4273
+
4274
+
4275
+
4276
+
4277
+
4278
+
4279
+
4280
+
4281
+
4282
+ 23%|██▎ | 4525/20000 [2:16:49<7:58:44, 1.86s/it]
4283
+
4284
+
4285
+
4286
+
4287
+
4288
+
4289
+
4290
+
4291
+
4292
+
4293
+
4294
+
4295
+
4296
+
4297
+
4298
+
4299
+
4300
+
4301
+
4302
+
4303
+
4304
+
4305
+
4306
+ 23%|██▎ | 4550/20000 [2:17:35<7:55:23, 1.85s/it]
4307
+
4308
+
4309
+
4310
+
4311
+
4312
+
4313
+
4314
+
4315
+
4316
+
4317
+
4318
+
4319
+
4320
+
4321
+
4322
+
4323
+
4324
+
4325
+
4326
+
4327
+
4328
+
4329
+
4330
+ 23%|██▎ | 4575/20000 [2:18:21<7:54:23, 1.85s/it]
4331
+
4332
+
4333
+
4334
+
4335
+
4336
+
4337
+
4338
+
4339
+
4340
+
4341
+
4342
+
4343
+
4344
+
4345
+
4346
+
4347
+
4348
+
4349
+
4350
+
4351
+
4352
+
4353
+
4354
+ 23%|██▎ | 4600/20000 [2:19:07<7:53:46, 1.85s/it]
4355
+
4356
+
4357
+
4358
+
4359
+
4360
+
4361
+
4362
+
4363
+
4364
+
4365
+
4366
+
4367
+
4368
+
4369
+
4370
+
4371
+
4372
+
4373
+
4374
+
4375
+
4376
+
4377
+
4378
+ 23%|██▎ | 4625/20000 [2:19:53<7:52:15, 1.84s/it]
4379
+
4380
+
4381
+
4382
+
4383
+
4384
+
4385
+
4386
+
4387
+
4388
+
4389
+
4390
+
4391
+
4392
+
4393
+
4394
+
4395
+
4396
+
4397
+
4398
+
4399
+
4400
+
4401
+
4402
+ 23%|██▎ | 4650/20000 [2:20:39<7:46:37, 1.82s/it]
4403
+
4404
+
4405
+
4406
+
4407
+
4408
+
4409
+
4410
+
4411
+
4412
+
4413
+
4414
+
4415
+
4416
+
4417
+
4418
+
4419
+
4420
+
4421
+
4422
+
4423
+
4424
+
4425
+
4426
+ 23%|██▎ | 4675/20000 [2:21:25<7:49:55, 1.84s/it]
4427
+
4428
+
4429
+
4430
+
4431
+
4432
+
4433
+
4434
+
4435
+
4436
+
4437
+
4438
+
4439
+
4440
+
4441
+
4442
+
4443
+
4444
+
4445
+
4446
+
4447
+
4448
+
4449
+ 23%|██▎ | 4699/20000 [2:22:09<7:46:33, 1.83s/it]
4450
+
4451
+
4452
+
4453
+
4454
+
4455
+
4456
+
4457
+
4458
+
4459
+
4460
+
4461
+
4462
+
4463
+
4464
+
4465
+
4466
+
4467
+
4468
+
4469
+
4470
+
4471
+
4472
+
4473
+ 24%|██▎ | 4724/20000 [2:22:54<7:42:44, 1.82s/it]
4474
+
4475
+
4476
+
4477
+
4478
+
4479
+
4480
+
4481
+
4482
+
4483
+
4484
+
4485
+
4486
+
4487
+
4488
+
4489
+
4490
+
4491
+
4492
+
4493
+
4494
+
4495
+
4496
+ 24%|██▎ | 4749/20000 [2:23:40<7:41:55, 1.82s/it]
4497
+
4498
+
4499
+
4500
+
4501
+
4502
+
4503
+
4504
+
4505
+
4506
+
4507
+
4508
+
4509
+
4510
+
4511
+
4512
+
4513
+
4514
+
4515
+
4516
+
4517
+
4518
+
4519
+
4520
+ 24%|██▍ | 4774/20000 [2:24:25<7:41:36, 1.82s/it]
4521
+
4522
+
4523
+
4524
+
4525
+
4526
+
4527
+
4528
+
4529
+
4530
+
4531
+
4532
+
4533
+
4534
+
4535
+
4536
+
4537
+
4538
+
4539
+
4540
+
4541
+
4542
+
4543
+
4544
+ 24%|██▍ | 4799/20000 [2:25:11<7:40:55, 1.82s/it]
4545
+
4546
+
4547
+
4548
+
4549
+
4550
+
4551
+
4552
+
4553
+
4554
+
4555
+
4556
+
4557
+
4558
+
4559
+
4560
+
4561
+
4562
+
4563
+
4564
+
4565
+
4566
+
4567
+
4568
+ 24%|██▍ | 4824/20000 [2:25:56<7:39:18, 1.82s/it]
4569
+
4570
+
4571
+
4572
+
4573
+
4574
+
4575
+
4576
+
4577
+
4578
+
4579
+
4580
+
4581
+
4582
+
4583
+
4584
+
4585
+
4586
+
4587
+
4588
+
4589
+
4590
+
4591
+ 24%|██▍ | 4849/20000 [2:26:42<7:41:13, 1.83s/it]
4592
+
4593
+
4594
+
4595
+
4596
+
4597
+
4598
+
4599
+
4600
+
4601
+
4602
+
4603
+
4604
+
4605
+
4606
+
4607
+
4608
+
4609
+
4610
+
4611
+
4612
+
4613
+
4614
+
4615
+ 24%|██▍ | 4874/20000 [2:27:28<7:44:53, 1.84s/it]
4616
+
4617
+
4618
+
4619
+
4620
+
4621
+
4622
+
4623
+
4624
+
4625
+
4626
+
4627
+
4628
+
4629
+
4630
+
4631
+
4632
+
4633
+
4634
+
4635
+
4636
+
4637
+
4638
+
4639
+ 24%|██▍ | 4899/20000 [2:28:14<7:42:04, 1.84s/it]
4640
+
4641
+
4642
+
4643
+
4644
+
4645
+
4646
+
4647
+
4648
+
4649
+
4650
+
4651
+
4652
+
4653
+
4654
+
4655
+
4656
+
4657
+
4658
+
4659
+
4660
+
4661
+
4662
+
4663
+ 25%|██▍ | 4924/20000 [2:28:59<7:39:16, 1.83s/it]
4664
+
4665
+
4666
+
4667
+
4668
+
4669
+
4670
+
4671
+
4672
+
4673
+
4674
+
4675
+
4676
+
4677
+
4678
+
4679
+
4680
+
4681
+
4682
+
4683
+
4684
+
4685
+
4686
+
4687
+ 25%|██▍ | 4949/20000 [2:29:45<7:37:43, 1.82s/it]
4688
+
4689
+
4690
+
4691
+
4692
+
4693
+
4694
+
4695
+
4696
+
4697
+
4698
+
4699
+
4700
+
4701
+
4702
+
4703
+
4704
+
4705
+
4706
+
4707
+
4708
+
4709
+
4710
+
4711
+ 25%|██▍ | 4974/20000 [2:30:30<7:36:47, 1.82s/it]
4712
+
4713
+
4714
+
4715
+
4716
+
4717
+
4718
+
4719
+
4720
+
4721
+
4722
+
4723
+
4724
+
4725
+
4726
+
4727
+
4728
+
4729
+
4730
+
4731
+
4732
+
4733
+
4734
+ 25%|██▌ | 5000/20000 [2:31:18<7:35:14, 1.82s/it][INFO|trainer.py:3614] 2024-04-25 16:16:41,164 >> ***** Running Evaluation *****
4735
+ [INFO|trainer.py:3616] 2024-04-25 16:16:41,164 >> Num examples = 639
4736
+ [INFO|trainer.py:3619] 2024-04-25 16:16:41,164 >> Batch size = 32
4737
+ {'loss': 1.135, 'grad_norm': 0.7578125, 'learning_rate': 7.692307692307693e-05, 'epoch': 1.24}
4738
+ {'eval_loss': 1.048352837562561, 'eval_runtime': 1.5386, 'eval_samples_per_second': 415.323, 'eval_steps_per_second': 1.95, 'epoch': 1.24}
4739
+ 25%|██▌ | 5000/20000 [2:31:19<7:35:14,[INFO|trainer.py:3305] 2024-04-25 16:16:42,715 >> Saving model checkpoint to ./checkpoint-5000
4740
+ [INFO|configuration_utils.py:471] 2024-04-25 16:16:42,718 >> Configuration saved in ./checkpoint-5000/config.json
4741
+ [INFO|configuration_utils.py:697] 2024-04-25 16:16:42,726 >> Configuration saved in ./checkpoint-5000/generation_config.json
4742
+ [INFO|modeling_utils.py:2590] 2024-04-25 16:16:46,709 >> Model weights saved in ./checkpoint-5000/model.safetensors
4743
+ [INFO|tokenization_utils_base.py:2488] 2024-04-25 16:16:46,715 >> tokenizer config file saved in ./checkpoint-5000/tokenizer_config.json
4744
+ [INFO|tokenization_utils_base.py:2497] 2024-04-25 16:16:46,716 >> Special tokens file saved in ./checkpoint-5000/special_tokens_map.json
4745
+ [INFO|tokenization_utils_base.py:2488] 2024-04-25 16:16:56,471 >> tokenizer config file saved in ./tokenizer_config.json
4746
+ [INFO|tokenization_utils_base.py:2497] 2024-04-25 16:16:56,473 >> Special tokens file saved in ./special_tokens_map.json
4747
+
4748
+
4749
+
4750
+
4751
+
4752
+
4753
+
4754
+
4755
+
4756
+
4757
+
4758
+
4759
+
4760
+
4761
+
4762
+
4763
+
4764
+
4765
+
4766
+
4767
+
4768
+ 25%|██▌ | 5024/20000 [2:32:17<7:35:07, 1.82s/it]
4769
+
4770
+
4771
+
4772
+
wandb/run-20240425_134518-etajcxpg/files/requirements.txt ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GitPython==3.1.43
2
+ Jinja2==3.1.3
3
+ Markdown==3.6
4
+ MarkupSafe==2.1.5
5
+ PyYAML==6.0.1
6
+ Pygments==2.17.2
7
+ Werkzeug==3.0.2
8
+ absl-py==2.1.0
9
+ accelerate==0.29.3
10
+ aiohttp==3.9.5
11
+ aiosignal==1.3.1
12
+ alignment-handbook==0.4.0.dev0
13
+ annotated-types==0.6.0
14
+ appdirs==1.4.4
15
+ attrs==23.2.0
16
+ bitsandbytes==0.43.1
17
+ certifi==2024.2.2
18
+ charset-normalizer==3.3.2
19
+ click==8.1.7
20
+ datasets==2.19.0
21
+ deepspeed==0.14.2
22
+ dill==0.3.8
23
+ docker-pycreds==0.4.0
24
+ docstring_parser==0.16
25
+ einops==0.7.0
26
+ evaluate==0.4.1
27
+ filelock==3.13.4
28
+ frozenlist==1.4.1
29
+ fsspec==2024.3.1
30
+ gitdb==4.0.11
31
+ grpcio==1.62.2
32
+ hf_transfer==0.1.6
33
+ hjson==3.1.0
34
+ huggingface-hub==0.22.2
35
+ idna==3.7
36
+ markdown-it-py==3.0.0
37
+ mdurl==0.1.2
38
+ mpmath==1.3.0
39
+ multidict==6.0.5
40
+ multiprocess==0.70.16
41
+ networkx==3.3
42
+ ninja==1.11.1.1
43
+ numpy==1.26.4
44
+ nvidia-cublas-cu12==12.1.3.1
45
+ nvidia-cuda-cupti-cu12==12.1.105
46
+ nvidia-cuda-nvrtc-cu12==12.1.105
47
+ nvidia-cuda-runtime-cu12==12.1.105
48
+ nvidia-cudnn-cu12==8.9.2.26
49
+ nvidia-cufft-cu12==11.0.2.54
50
+ nvidia-curand-cu12==10.3.2.106
51
+ nvidia-cusolver-cu12==11.4.5.107
52
+ nvidia-cusparse-cu12==12.1.0.106
53
+ nvidia-nccl-cu12==2.19.3
54
+ nvidia-nvjitlink-cu12==12.4.127
55
+ nvidia-nvtx-cu12==12.1.105
56
+ packaging==24.0
57
+ pandas==2.2.2
58
+ peft==0.10.0
59
+ pillow==10.3.0
60
+ pip==23.3.1
61
+ protobuf==3.20.2
62
+ psutil==5.9.8
63
+ py-cpuinfo==9.0.0
64
+ pyarrow-hotfix==0.6
65
+ pyarrow==16.0.0
66
+ pydantic==2.7.1
67
+ pydantic_core==2.18.2
68
+ pynvml==11.5.0
69
+ python-dateutil==2.9.0.post0
70
+ pytz==2024.1
71
+ regex==2024.4.16
72
+ requests==2.31.0
73
+ responses==0.18.0
74
+ rich==13.7.1
75
+ safetensors==0.4.3
76
+ scipy==1.13.0
77
+ sentencepiece==0.2.0
78
+ sentry-sdk==2.0.0
79
+ setproctitle==1.3.3
80
+ setuptools==68.2.2
81
+ shtab==1.7.1
82
+ six==1.16.0
83
+ smmap==5.0.1
84
+ sympy==1.12
85
+ tensorboard-data-server==0.7.2
86
+ tensorboard==2.16.2
87
+ tokenizers==0.19.1
88
+ torch==2.2.2
89
+ torchaudio==2.2.2
90
+ torchvision==0.17.2
91
+ tqdm==4.66.2
92
+ transformers==4.40.1
93
+ triton==2.2.0
94
+ trl==0.8.6
95
+ typing_extensions==4.11.0
96
+ tyro==0.8.3
97
+ tzdata==2024.1
98
+ urllib3==2.2.1
99
+ wandb==0.16.6
100
+ wheel==0.41.2
101
+ xxhash==3.4.1
102
+ yarl==1.9.4
wandb/run-20240425_134518-etajcxpg/files/wandb-metadata.json ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-1048-aws-x86_64-with-glibc2.31",
3
+ "python": "3.11.9",
4
+ "heartbeatAt": "2024-04-25T13:45:18.674598",
5
+ "startedAt": "2024-04-25T13:45:18.212334",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "./config_full.yaml"
10
+ ],
11
+ "state": "running",
12
+ "program": "/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py",
13
+ "codePathLocal": "run_sft.py",
14
+ "codePath": "run_sft.py",
15
+ "git": {
16
+ "remote": "https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft-ultrachat",
17
+ "commit": "cbea69c6b95c970317a1e47c3f614b55b33f8ed9"
18
+ },
19
+ "email": null,
20
+ "root": "/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat",
21
+ "host": "ip-26-0-167-177",
22
+ "username": "sanchit",
23
+ "executable": "/fsx/sanchit/miniconda3/envs/alignment/bin/python",
24
+ "cpu_count": 96,
25
+ "cpu_count_logical": 96,
26
+ "cpu_freq": {
27
+ "current": 2725.4579687499986,
28
+ "min": 0.0,
29
+ "max": 0.0
30
+ },
31
+ "cpu_freq_per_core": [
32
+ {
33
+ "current": 2649.998,
34
+ "min": 0.0,
35
+ "max": 0.0
36
+ },
37
+ {
38
+ "current": 2649.998,
39
+ "min": 0.0,
40
+ "max": 0.0
41
+ },
42
+ {
43
+ "current": 3597.164,
44
+ "min": 0.0,
45
+ "max": 0.0
46
+ },
47
+ {
48
+ "current": 2649.998,
49
+ "min": 0.0,
50
+ "max": 0.0
51
+ },
52
+ {
53
+ "current": 2649.998,
54
+ "min": 0.0,
55
+ "max": 0.0
56
+ },
57
+ {
58
+ "current": 2649.998,
59
+ "min": 0.0,
60
+ "max": 0.0
61
+ },
62
+ {
63
+ "current": 2649.998,
64
+ "min": 0.0,
65
+ "max": 0.0
66
+ },
67
+ {
68
+ "current": 2649.998,
69
+ "min": 0.0,
70
+ "max": 0.0
71
+ },
72
+ {
73
+ "current": 2649.998,
74
+ "min": 0.0,
75
+ "max": 0.0
76
+ },
77
+ {
78
+ "current": 2649.998,
79
+ "min": 0.0,
80
+ "max": 0.0
81
+ },
82
+ {
83
+ "current": 2649.998,
84
+ "min": 0.0,
85
+ "max": 0.0
86
+ },
87
+ {
88
+ "current": 2649.998,
89
+ "min": 0.0,
90
+ "max": 0.0
91
+ },
92
+ {
93
+ "current": 2649.998,
94
+ "min": 0.0,
95
+ "max": 0.0
96
+ },
97
+ {
98
+ "current": 2649.998,
99
+ "min": 0.0,
100
+ "max": 0.0
101
+ },
102
+ {
103
+ "current": 2649.998,
104
+ "min": 0.0,
105
+ "max": 0.0
106
+ },
107
+ {
108
+ "current": 2649.998,
109
+ "min": 0.0,
110
+ "max": 0.0
111
+ },
112
+ {
113
+ "current": 2649.998,
114
+ "min": 0.0,
115
+ "max": 0.0
116
+ },
117
+ {
118
+ "current": 2649.998,
119
+ "min": 0.0,
120
+ "max": 0.0
121
+ },
122
+ {
123
+ "current": 2649.998,
124
+ "min": 0.0,
125
+ "max": 0.0
126
+ },
127
+ {
128
+ "current": 2649.998,
129
+ "min": 0.0,
130
+ "max": 0.0
131
+ },
132
+ {
133
+ "current": 2649.998,
134
+ "min": 0.0,
135
+ "max": 0.0
136
+ },
137
+ {
138
+ "current": 2649.998,
139
+ "min": 0.0,
140
+ "max": 0.0
141
+ },
142
+ {
143
+ "current": 2649.998,
144
+ "min": 0.0,
145
+ "max": 0.0
146
+ },
147
+ {
148
+ "current": 2649.998,
149
+ "min": 0.0,
150
+ "max": 0.0
151
+ },
152
+ {
153
+ "current": 2649.998,
154
+ "min": 0.0,
155
+ "max": 0.0
156
+ },
157
+ {
158
+ "current": 2649.998,
159
+ "min": 0.0,
160
+ "max": 0.0
161
+ },
162
+ {
163
+ "current": 2649.998,
164
+ "min": 0.0,
165
+ "max": 0.0
166
+ },
167
+ {
168
+ "current": 2649.998,
169
+ "min": 0.0,
170
+ "max": 0.0
171
+ },
172
+ {
173
+ "current": 2649.998,
174
+ "min": 0.0,
175
+ "max": 0.0
176
+ },
177
+ {
178
+ "current": 2649.998,
179
+ "min": 0.0,
180
+ "max": 0.0
181
+ },
182
+ {
183
+ "current": 2649.998,
184
+ "min": 0.0,
185
+ "max": 0.0
186
+ },
187
+ {
188
+ "current": 2649.998,
189
+ "min": 0.0,
190
+ "max": 0.0
191
+ },
192
+ {
193
+ "current": 2649.998,
194
+ "min": 0.0,
195
+ "max": 0.0
196
+ },
197
+ {
198
+ "current": 2649.998,
199
+ "min": 0.0,
200
+ "max": 0.0
201
+ },
202
+ {
203
+ "current": 2649.998,
204
+ "min": 0.0,
205
+ "max": 0.0
206
+ },
207
+ {
208
+ "current": 2649.998,
209
+ "min": 0.0,
210
+ "max": 0.0
211
+ },
212
+ {
213
+ "current": 2649.998,
214
+ "min": 0.0,
215
+ "max": 0.0
216
+ },
217
+ {
218
+ "current": 2649.998,
219
+ "min": 0.0,
220
+ "max": 0.0
221
+ },
222
+ {
223
+ "current": 2649.998,
224
+ "min": 0.0,
225
+ "max": 0.0
226
+ },
227
+ {
228
+ "current": 2649.998,
229
+ "min": 0.0,
230
+ "max": 0.0
231
+ },
232
+ {
233
+ "current": 2649.998,
234
+ "min": 0.0,
235
+ "max": 0.0
236
+ },
237
+ {
238
+ "current": 2649.998,
239
+ "min": 0.0,
240
+ "max": 0.0
241
+ },
242
+ {
243
+ "current": 2649.998,
244
+ "min": 0.0,
245
+ "max": 0.0
246
+ },
247
+ {
248
+ "current": 2649.998,
249
+ "min": 0.0,
250
+ "max": 0.0
251
+ },
252
+ {
253
+ "current": 2649.998,
254
+ "min": 0.0,
255
+ "max": 0.0
256
+ },
257
+ {
258
+ "current": 2649.998,
259
+ "min": 0.0,
260
+ "max": 0.0
261
+ },
262
+ {
263
+ "current": 2649.998,
264
+ "min": 0.0,
265
+ "max": 0.0
266
+ },
267
+ {
268
+ "current": 2649.998,
269
+ "min": 0.0,
270
+ "max": 0.0
271
+ },
272
+ {
273
+ "current": 2649.998,
274
+ "min": 0.0,
275
+ "max": 0.0
276
+ },
277
+ {
278
+ "current": 2649.998,
279
+ "min": 0.0,
280
+ "max": 0.0
281
+ },
282
+ {
283
+ "current": 2649.998,
284
+ "min": 0.0,
285
+ "max": 0.0
286
+ },
287
+ {
288
+ "current": 3597.955,
289
+ "min": 0.0,
290
+ "max": 0.0
291
+ },
292
+ {
293
+ "current": 2649.998,
294
+ "min": 0.0,
295
+ "max": 0.0
296
+ },
297
+ {
298
+ "current": 2649.998,
299
+ "min": 0.0,
300
+ "max": 0.0
301
+ },
302
+ {
303
+ "current": 2649.998,
304
+ "min": 0.0,
305
+ "max": 0.0
306
+ },
307
+ {
308
+ "current": 2649.998,
309
+ "min": 0.0,
310
+ "max": 0.0
311
+ },
312
+ {
313
+ "current": 2649.998,
314
+ "min": 0.0,
315
+ "max": 0.0
316
+ },
317
+ {
318
+ "current": 2649.998,
319
+ "min": 0.0,
320
+ "max": 0.0
321
+ },
322
+ {
323
+ "current": 2649.998,
324
+ "min": 0.0,
325
+ "max": 0.0
326
+ },
327
+ {
328
+ "current": 2649.998,
329
+ "min": 0.0,
330
+ "max": 0.0
331
+ },
332
+ {
333
+ "current": 2649.998,
334
+ "min": 0.0,
335
+ "max": 0.0
336
+ },
337
+ {
338
+ "current": 2649.998,
339
+ "min": 0.0,
340
+ "max": 0.0
341
+ },
342
+ {
343
+ "current": 2649.998,
344
+ "min": 0.0,
345
+ "max": 0.0
346
+ },
347
+ {
348
+ "current": 3596.13,
349
+ "min": 0.0,
350
+ "max": 0.0
351
+ },
352
+ {
353
+ "current": 2649.998,
354
+ "min": 0.0,
355
+ "max": 0.0
356
+ },
357
+ {
358
+ "current": 2649.998,
359
+ "min": 0.0,
360
+ "max": 0.0
361
+ },
362
+ {
363
+ "current": 2649.998,
364
+ "min": 0.0,
365
+ "max": 0.0
366
+ },
367
+ {
368
+ "current": 2649.998,
369
+ "min": 0.0,
370
+ "max": 0.0
371
+ },
372
+ {
373
+ "current": 3597.91,
374
+ "min": 0.0,
375
+ "max": 0.0
376
+ },
377
+ {
378
+ "current": 2649.998,
379
+ "min": 0.0,
380
+ "max": 0.0
381
+ },
382
+ {
383
+ "current": 2649.998,
384
+ "min": 0.0,
385
+ "max": 0.0
386
+ },
387
+ {
388
+ "current": 2649.998,
389
+ "min": 0.0,
390
+ "max": 0.0
391
+ },
392
+ {
393
+ "current": 2649.998,
394
+ "min": 0.0,
395
+ "max": 0.0
396
+ },
397
+ {
398
+ "current": 2649.998,
399
+ "min": 0.0,
400
+ "max": 0.0
401
+ },
402
+ {
403
+ "current": 2649.998,
404
+ "min": 0.0,
405
+ "max": 0.0
406
+ },
407
+ {
408
+ "current": 2649.998,
409
+ "min": 0.0,
410
+ "max": 0.0
411
+ },
412
+ {
413
+ "current": 2649.998,
414
+ "min": 0.0,
415
+ "max": 0.0
416
+ },
417
+ {
418
+ "current": 2649.998,
419
+ "min": 0.0,
420
+ "max": 0.0
421
+ },
422
+ {
423
+ "current": 2649.998,
424
+ "min": 0.0,
425
+ "max": 0.0
426
+ },
427
+ {
428
+ "current": 3592.057,
429
+ "min": 0.0,
430
+ "max": 0.0
431
+ },
432
+ {
433
+ "current": 2649.998,
434
+ "min": 0.0,
435
+ "max": 0.0
436
+ },
437
+ {
438
+ "current": 2649.998,
439
+ "min": 0.0,
440
+ "max": 0.0
441
+ },
442
+ {
443
+ "current": 2649.998,
444
+ "min": 0.0,
445
+ "max": 0.0
446
+ },
447
+ {
448
+ "current": 2649.998,
449
+ "min": 0.0,
450
+ "max": 0.0
451
+ },
452
+ {
453
+ "current": 2649.998,
454
+ "min": 0.0,
455
+ "max": 0.0
456
+ },
457
+ {
458
+ "current": 3267.919,
459
+ "min": 0.0,
460
+ "max": 0.0
461
+ },
462
+ {
463
+ "current": 2649.998,
464
+ "min": 0.0,
465
+ "max": 0.0
466
+ },
467
+ {
468
+ "current": 3597.091,
469
+ "min": 0.0,
470
+ "max": 0.0
471
+ },
472
+ {
473
+ "current": 2649.998,
474
+ "min": 0.0,
475
+ "max": 0.0
476
+ },
477
+ {
478
+ "current": 2649.998,
479
+ "min": 0.0,
480
+ "max": 0.0
481
+ },
482
+ {
483
+ "current": 2649.998,
484
+ "min": 0.0,
485
+ "max": 0.0
486
+ },
487
+ {
488
+ "current": 2649.998,
489
+ "min": 0.0,
490
+ "max": 0.0
491
+ },
492
+ {
493
+ "current": 3597.915,
494
+ "min": 0.0,
495
+ "max": 0.0
496
+ },
497
+ {
498
+ "current": 2649.998,
499
+ "min": 0.0,
500
+ "max": 0.0
501
+ },
502
+ {
503
+ "current": 2649.998,
504
+ "min": 0.0,
505
+ "max": 0.0
506
+ },
507
+ {
508
+ "current": 2649.998,
509
+ "min": 0.0,
510
+ "max": 0.0
511
+ }
512
+ ],
513
+ "disk": {
514
+ "/": {
515
+ "total": 290.7472343444824,
516
+ "used": 58.59657669067383
517
+ }
518
+ },
519
+ "gpu": "NVIDIA H100 80GB HBM3",
520
+ "gpu_count": 8,
521
+ "gpu_devices": [
522
+ {
523
+ "name": "NVIDIA H100 80GB HBM3",
524
+ "memory_total": 85520809984
525
+ },
526
+ {
527
+ "name": "NVIDIA H100 80GB HBM3",
528
+ "memory_total": 85520809984
529
+ },
530
+ {
531
+ "name": "NVIDIA H100 80GB HBM3",
532
+ "memory_total": 85520809984
533
+ },
534
+ {
535
+ "name": "NVIDIA H100 80GB HBM3",
536
+ "memory_total": 85520809984
537
+ },
538
+ {
539
+ "name": "NVIDIA H100 80GB HBM3",
540
+ "memory_total": 85520809984
541
+ },
542
+ {
543
+ "name": "NVIDIA H100 80GB HBM3",
544
+ "memory_total": 85520809984
545
+ },
546
+ {
547
+ "name": "NVIDIA H100 80GB HBM3",
548
+ "memory_total": 85520809984
549
+ },
550
+ {
551
+ "name": "NVIDIA H100 80GB HBM3",
552
+ "memory_total": 85520809984
553
+ }
554
+ ],
555
+ "memory": {
556
+ "total": 1999.9855155944824
557
+ }
558
+ }
wandb/run-20240425_134518-etajcxpg/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 1.1348, "train/grad_norm": 0.8203125, "train/learning_rate": 7.67948717948718e-05, "train/epoch": 1.242274412855377, "train/global_step": 5025, "_timestamp": 1714061862.1020992, "_runtime": 9143.862290143967, "_step": 202, "eval/loss": 1.048352837562561, "eval/runtime": 1.5386, "eval/samples_per_second": 415.323, "eval/steps_per_second": 1.95}
wandb/run-20240425_134518-etajcxpg/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240425_134518-etajcxpg/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Configure stats pid to 156194
3
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
4
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/settings
5
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-04-25 13:45:18,225 INFO MainThread:156194 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/./run_sft.py'}
8
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:_log_setup():521] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/logs/debug.log
10
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:_log_setup():522] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft-ultrachat/wandb/run-20240425_134518-etajcxpg/logs/debug-internal.log
11
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():561] calling init triggers
12
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():611] starting backend
15
+ 2024-04-25 13:45:18,226 INFO MainThread:156194 [wandb_init.py:init():615] setting up manager
16
+ 2024-04-25 13:45:18,231 INFO MainThread:156194 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-04-25 13:45:18,239 INFO MainThread:156194 [wandb_init.py:init():623] backend started and connected
18
+ 2024-04-25 13:45:18,241 INFO MainThread:156194 [wandb_init.py:init():715] updated telemetry
19
+ 2024-04-25 13:45:18,267 INFO MainThread:156194 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
20
+ 2024-04-25 13:45:18,506 INFO MainThread:156194 [wandb_run.py:_on_init():2357] communicating current version
21
+ 2024-04-25 13:45:18,558 INFO MainThread:156194 [wandb_run.py:_on_init():2366] got version response
22
+ 2024-04-25 13:45:18,558 INFO MainThread:156194 [wandb_init.py:init():799] starting run threads in backend
23
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_console_start():2335] atexit reg
24
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2190] redirect: wrap_raw
25
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2255] Wrapping output streams.
26
+ 2024-04-25 13:45:22,708 INFO MainThread:156194 [wandb_run.py:_redirect():2280] Redirects installed.
27
+ 2024-04-25 13:45:22,709 INFO MainThread:156194 [wandb_init.py:init():842] run started, returning control to user process
28
+ 2024-04-25 13:45:22,711 INFO MainThread:156194 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.40.1', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 20000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Apr25_13-44-28_ip-26-0-167-177', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 7200, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'max_seq_length': 2048}
wandb/run-20240425_134518-etajcxpg/run-etajcxpg.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7106ec7a3c9fc6ae7e330c0412d38729748b95cc3dd091bc3a351fe0c1f7e1
3
+ size 2359339