bibekyess commited on Mar 10

Commit

b1a7e7e

•

1 Parent(s): 78261d2

Training in progress, step 2000

Browse files

Files changed (34) hide show

adapter_config.json +33 -0
adapter_model.safetensors +3 -0
preprocessed_data/augmented_train.csv +0 -0
preprocessed_data/augmented_train_2.csv +0 -0
runs/Mar09_16-47-28_f191c6c9daaa/events.out.tfevents.1710002942.f191c6c9daaa.148.0 +3 -0
runs/Mar09_16-55-33_f191c6c9daaa/events.out.tfevents.1710003341.f191c6c9daaa.12263.0 +3 -0
runs/Mar09_16-58-32_f191c6c9daaa/events.out.tfevents.1710003530.f191c6c9daaa.12263.1 +3 -0
runs/Mar09_17-04-24_f191c6c9daaa/events.out.tfevents.1710003871.f191c6c9daaa.14775.0 +3 -0
runs/Mar10_03-18-22_df99ceffa61d/events.out.tfevents.1710040720.df99ceffa61d.323.0 +3 -0
runs/Mar10_03-19-48_df99ceffa61d/events.out.tfevents.1710040896.df99ceffa61d.323.1 +3 -0
runs/Mar10_03-22-00_df99ceffa61d/events.out.tfevents.1710040949.df99ceffa61d.323.2 +3 -0
runs/Mar10_03-30-49_df99ceffa61d/events.out.tfevents.1710041490.df99ceffa61d.323.3 +3 -0
runs/Mar10_06-48-48_17daf5749447/events.out.tfevents.1710053395.17daf5749447.929.0 +3 -0
special_tokens_map.json +18 -0
tokenizer.json +0 -0
tokenizer_config.json +0 -0
training_args.bin +3 -0
wandb/debug-cli.root.log +0 -0
wandb/debug-internal.log +0 -0
wandb/debug.log +30 -0
wandb/run-20240310_031850-h5a6szhj/files/config.yaml +664 -0
wandb/run-20240310_031850-h5a6szhj/files/output.log +12 -0
wandb/run-20240310_031850-h5a6szhj/files/requirements.txt +500 -0
wandb/run-20240310_031850-h5a6szhj/files/wandb-metadata.json +52 -0
wandb/run-20240310_031850-h5a6szhj/logs/debug.log +77 -0
wandb/run-20240310_031850-h5a6szhj/run-h5a6szhj.wandb +0 -0
wandb/run-20240310_065024-lx2gw13k/files/config.yaml +680 -0
wandb/run-20240310_065024-lx2gw13k/files/output.log +6 -0
wandb/run-20240310_065024-lx2gw13k/files/requirements.txt +500 -0
wandb/run-20240310_065024-lx2gw13k/files/wandb-metadata.json +52 -0
wandb/run-20240310_065024-lx2gw13k/files/wandb-summary.json +1 -0
wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log +0 -0
wandb/run-20240310_065024-lx2gw13k/logs/debug.log +30 -0
wandb/run-20240310_065024-lx2gw13k/run-lx2gw13k.wandb +0 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Edentns/DataVortexS-10.7B-dpo-v1.11",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "k_proj",
+    "gate_proj",
+    "up_proj",
+    "v_proj",
+    "down_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb282a193e1395598ece0a26d494255e4a01998e06adeb1111c57000f4f8db1
+size 1006723888

preprocessed_data/augmented_train.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessed_data/augmented_train_2.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Mar09_16-47-28_f191c6c9daaa/events.out.tfevents.1710002942.f191c6c9daaa.148.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:937a2512af333a158ccb08c5723349dcfda08b039b922fb124780c3ff3565270
+size 88

runs/Mar09_16-55-33_f191c6c9daaa/events.out.tfevents.1710003341.f191c6c9daaa.12263.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d744b4b0b56fcdfe4bf90a2c4041a3a3135370d3d6187e4d0bc449962c93d0cc
+size 5213

runs/Mar09_16-58-32_f191c6c9daaa/events.out.tfevents.1710003530.f191c6c9daaa.12263.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffb18d531e5c5d9ee4ddd498fba2e4c938b8d3a4d6402497a2d51cb3fc28ba8f
+size 88

runs/Mar09_17-04-24_f191c6c9daaa/events.out.tfevents.1710003871.f191c6c9daaa.14775.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb2a206c8f65fe8f03a1aff3b5a5ed8d02eae1076ba66251bdef0035accfe4f8
+size 88

runs/Mar10_03-18-22_df99ceffa61d/events.out.tfevents.1710040720.df99ceffa61d.323.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4db16b9999626263a3ea52181fa87a77b6eb6cffc71d189d819d2f9caaf8e68a
+size 5185

runs/Mar10_03-19-48_df99ceffa61d/events.out.tfevents.1710040896.df99ceffa61d.323.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e02e989db47e4e3a143b302d352f8627bbfe565c1adc808ec88519e333f58abb
+size 5184

runs/Mar10_03-22-00_df99ceffa61d/events.out.tfevents.1710040949.df99ceffa61d.323.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13c99ac5b361e88f30fe3b42328f96db3f72801164acd4eb377ee4b70c11cf91
+size 5185

runs/Mar10_03-30-49_df99ceffa61d/events.out.tfevents.1710041490.df99ceffa61d.323.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9124b73ff612dfa665f4815efd2166611e939ecc900d5e8b1a69be9fbb27976
+size 5185

runs/Mar10_06-48-48_17daf5749447/events.out.tfevents.1710053395.17daf5749447.929.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29822f4369a998315fcd35b76020ee51c77f9a0a59a98c3cfbffd0ce2a481d15
+size 5607

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": "###",
+  "pad_token": "###",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb1c6cda846927c63f8fb6d781f64a30d892d5d0815a91d93919c2820e6dbf56
+size 4984

wandb/debug-cli.root.log ADDED Viewed

File without changes

wandb/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,30 @@

+2024-03-10 06:50:24,735 INFO    MainThread:929 [wandb_setup.py:_flush():76] Current SDK version is 0.16.4
+2024-03-10 06:50:24,736 INFO    MainThread:929 [wandb_setup.py:_flush():76] Configure stats pid to 929
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/settings
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-03-10 06:50:24,738 INFO    MainThread:929 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-03-10 06:50:24,738 INFO    MainThread:929 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2024-03-10 06:50:24,739 INFO    MainThread:929 [wandb_init.py:_log_setup():526] Logging user logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug.log
+2024-03-10 06:50:24,740 INFO    MainThread:929 [wandb_init.py:_log_setup():527] Logging internal logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log
+2024-03-10 06:50:24,740 INFO    MainThread:929 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7aa380ba0220>
+2024-03-10 06:50:24,741 INFO    MainThread:929 [wandb_init.py:init():566] calling init triggers
+2024-03-10 06:50:24,741 INFO    MainThread:929 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
+config: {}
+2024-03-10 06:50:24,741 INFO    MainThread:929 [wandb_init.py:init():616] starting backend
+2024-03-10 06:50:24,742 INFO    MainThread:929 [wandb_init.py:init():620] setting up manager
+2024-03-10 06:50:24,749 INFO    MainThread:929 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-03-10 06:50:24,751 INFO    MainThread:929 [wandb_init.py:init():628] backend started and connected
+2024-03-10 06:50:24,833 INFO    MainThread:929 [wandb_run.py:_label_probe_notebook():1295] probe notebook
+2024-03-10 06:50:27,302 INFO    MainThread:929 [wandb_init.py:init():720] updated telemetry
+2024-03-10 06:50:27,312 INFO    MainThread:929 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
+2024-03-10 06:50:27,741 INFO    MainThread:929 [wandb_run.py:_on_init():2262] communicating current version
+2024-03-10 06:50:27,844 INFO    MainThread:929 [wandb_run.py:_on_init():2271] got version response
+2024-03-10 06:50:27,844 INFO    MainThread:929 [wandb_init.py:init():804] starting run threads in backend
+2024-03-10 06:50:28,339 INFO    MainThread:929 [wandb_run.py:_console_start():2241] atexit reg
+2024-03-10 06:50:28,339 INFO    MainThread:929 [wandb_run.py:_redirect():2096] redirect: wrap_raw
+2024-03-10 06:50:28,340 INFO    MainThread:929 [wandb_run.py:_redirect():2161] Wrapping output streams.
+2024-03-10 06:50:28,340 INFO    MainThread:929 [wandb_run.py:_redirect():2186] Redirects installed.
+2024-03-10 06:50:28,343 INFO    MainThread:929 [wandb_init.py:init():847] run started, returning control to user process
+2024-03-10 06:50:28,351 INFO    MainThread:929 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_06-48-48_17daf5749447', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}

wandb/run-20240310_031850-h5a6szhj/files/config.yaml ADDED Viewed

	@@ -0,0 +1,664 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.10.12
+    cli_version: 0.16.4
+    framework: huggingface
+    huggingface_version: 4.38.2
+    is_jupyter_run: true
+    is_kaggle_kernel: false
+    start_time: 1710040730.0
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      2:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      3:
+      - 7
+      - 23
+      4: 3.10.12
+      5: 0.16.4
+      6: 4.38.2
+      8:
+      - 1
+      - 5
+      - 12
+      9:
+        1: transformers_trainer
+      13: linux-x86_64
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+vocab_size:
+  desc: null
+  value: 48000
+max_position_embeddings:
+  desc: null
+  value: 4096
+hidden_size:
+  desc: null
+  value: 4096
+intermediate_size:
+  desc: null
+  value: 14336
+num_hidden_layers:
+  desc: null
+  value: 48
+num_attention_heads:
+  desc: null
+  value: 32
+num_key_value_heads:
+  desc: null
+  value: 8
+hidden_act:
+  desc: null
+  value: silu
+initializer_range:
+  desc: null
+  value: 0.02
+rms_norm_eps:
+  desc: null
+  value: 1.0e-05
+pretraining_tp:
+  desc: null
+  value: 1
+use_cache:
+  desc: null
+  value: true
+rope_theta:
+  desc: null
+  value: 10000.0
+rope_scaling:
+  desc: null
+  value: null
+attention_bias:
+  desc: null
+  value: false
+attention_dropout:
+  desc: null
+  value: 0.0
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: float16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: false
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - LlamaForCausalLM
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+bos_token_id:
+  desc: null
+  value: 1
+pad_token_id:
+  desc: null
+  value: 2
+eos_token_id:
+  desc: null
+  value: 32000
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: Edentns/DataVortexS-10.7B-dpo-v1.11
+transformers_version:
+  desc: null
+  value: 4.38.2
+model_type:
+  desc: null
+  value: llama
+quantization_config:
+  desc: null
+  value:
+    quant_method: QuantizationMethod.BITS_AND_BYTES
+    _load_in_8bit: false
+    _load_in_4bit: true
+    llm_int8_threshold: 6.0
+    llm_int8_skip_modules: null
+    llm_int8_enable_fp32_cpu_offload: false
+    llm_int8_has_fp16_weight: false
+    bnb_4bit_quant_type: nf4
+    bnb_4bit_use_double_quant: true
+    bnb_4bit_compute_dtype: float16
+    load_in_4bit: true
+    load_in_8bit: false
+output_dir:
+  desc: null
+  value: /content/drive/MyDrive/best-one
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: false
+do_predict:
+  desc: null
+  value: false
+evaluation_strategy:
+  desc: null
+  value: 'no'
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 1
+per_device_eval_batch_size:
+  desc: null
+  value: 8
+per_gpu_train_batch_size:
+  desc: null
+  value: null
+per_gpu_eval_batch_size:
+  desc: null
+  value: null
+gradient_accumulation_steps:
+  desc: null
+  value: 1
+eval_accumulation_steps:
+  desc: null
+  value: null
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 0.0002
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 0.3
+num_train_epochs:
+  desc: null
+  value: 4
+max_steps:
+  desc: null
+  value: -1
+lr_scheduler_type:
+  desc: null
+  value: constant
+lr_scheduler_kwargs:
+  desc: null
+  value: {}
+warmup_ratio:
+  desc: null
+  value: 0.03
+warmup_steps:
+  desc: null
+  value: 0
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: /content/drive/MyDrive/best-one/runs/Mar10_03-30-49_df99ceffa61d
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 500
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: steps
+save_steps:
+  desc: null
+  value: 10
+save_total_limit:
+  desc: null
+  value: null
+save_safetensors:
+  desc: null
+  value: true
+save_on_each_node:
+  desc: null
+  value: false
+save_only_model:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_cpu:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: null
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: null
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: null
+tpu_num_cores:
+  desc: null
+  value: null
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: []
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: null
+dataloader_num_workers:
+  desc: null
+  value: 0
+dataloader_prefetch_factor:
+  desc: null
+  value: null
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: /content/drive/MyDrive/best-one
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: null
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: null
+greater_is_better:
+  desc: null
+  value: null
+ignore_data_skip:
+  desc: null
+  value: false
+fsdp:
+  desc: null
+  value: []
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_v2: false
+    xla_fsdp_grad_ckpt: false
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: null
+accelerator_config:
+  desc: null
+  value:
+    split_batches: false
+    dispatch_batches: null
+    even_batches: true
+    use_seedable_sampler: true
+deepspeed:
+  desc: null
+  value: null
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: adamw_torch_fused
+optim_args:
+  desc: null
+  value: null
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: false
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value:
+  - tensorboard
+  - wandb
+ddp_find_unused_parameters:
+  desc: null
+  value: null
+ddp_bucket_cap_mb:
+  desc: null
+  value: null
+ddp_broadcast_buffers:
+  desc: null
+  value: null
+dataloader_pin_memory:
+  desc: null
+  value: true
+dataloader_persistent_workers:
+  desc: null
+  value: false
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: false
+resume_from_checkpoint:
+  desc: null
+  value: /content/drive/MyDrive/best-one/checkpoint-1000
+hub_model_id:
+  desc: null
+  value: null
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+hub_always_push:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: true
+gradient_checkpointing_kwargs:
+  desc: null
+  value: null
+include_inputs_for_metrics:
+  desc: null
+  value: false
+fp16_backend:
+  desc: null
+  value: auto
+push_to_hub_model_id:
+  desc: null
+  value: null
+push_to_hub_organization:
+  desc: null
+  value: null
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: null
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: null
+torch_compile_mode:
+  desc: null
+  value: null
+dispatch_batches:
+  desc: null
+  value: null
+split_batches:
+  desc: null
+  value: null
+include_tokens_per_second:
+  desc: null
+  value: false
+include_num_input_tokens_seen:
+  desc: null
+  value: false
+neftune_noise_alpha:
+  desc: null
+  value: null

wandb/run-20240310_031850-h5a6szhj/files/output.log ADDED Viewed

	@@ -0,0 +1,12 @@

+`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
+/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:434: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:434: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/trl/trainer/utils.py:434: UserWarning: The passed formatting_func has more than one argument. Usually that function should have a single argument `example` which corresponds to the dictionary returned by each element of the dataset. Make sure you know what you are doing.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.

wandb/run-20240310_031850-h5a6szhj/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,500 @@

+Babel==2.14.0
+CacheControl==0.14.0
+Cython==3.0.9
+Flask==2.2.5
+GDAL==3.6.4
+GitPython==3.1.42
+Jinja2==3.1.3
+Markdown==3.5.2
+MarkupSafe==2.1.5
+Pillow==9.4.0
+PyDrive2==1.6.3
+PyDrive==1.3.1
+PyGObject==3.42.1
+PyJWT==2.3.0
+PyOpenGL==3.1.7
+PySocks==1.7.1
+PyWavelets==1.5.0
+PyYAML==6.0.1
+Pygments==2.16.1
+SQLAlchemy==2.0.28
+SecretStorage==3.3.1
+Send2Trash==1.8.2
+Sphinx==5.0.2
+Werkzeug==3.0.1
+absl-py==1.4.0
+accelerate==0.27.2
+aiohttp==3.9.3
+aiosignal==1.3.1
+alabaster==0.7.16
+albumentations==1.3.1
+altair==4.2.2
+annotated-types==0.6.0
+anyio==3.7.1
+appdirs==1.4.4
+argon2-cffi-bindings==21.2.0
+argon2-cffi==23.1.0
+array-record==0.5.0
+arviz==0.15.1
+astropy==5.3.4
+astunparse==1.6.3
+async-timeout==4.0.3
+atpublic==4.0
+attrs==23.2.0
+audioread==3.0.1
+autograd==1.6.2
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bidict==0.23.1
+bigframes==0.22.0
+bitsandbytes==0.43.0
+bleach==6.1.0
+blinker==1.4
+blis==0.7.11
+blosc2==2.0.0
+bokeh==3.3.4
+bqplot==0.12.43
+branca==0.7.1
+build==1.1.1
+cachetools==5.3.3
+catalogue==2.0.10
+certifi==2024.2.2
+cffi==1.16.0
+chardet==5.2.0
+charset-normalizer==3.3.2
+chex==0.1.85
+click-plugins==1.1.1
+click==8.1.7
+cligj==0.7.2
+cloudpathlib==0.16.0
+cloudpickle==2.2.1
+cmake==3.27.9
+cmdstanpy==1.2.1
+colorcet==3.1.0
+colorlover==0.3.0
+colour==0.1.5
+community==1.0.0b1
+confection==0.1.4
+cons==0.4.6
+contextlib2==21.6.0
+contourpy==1.2.0
+cryptography==42.0.5
+cufflinks==0.17.3
+cupy-cuda12x==12.2.0
+cvxopt==1.3.2
+cvxpy==1.3.3
+cycler==0.12.1
+cymem==2.0.8
+dask==2023.8.1
+datascience==0.17.6
+datasets==2.18.0
+db-dtypes==1.2.0
+dbus-python==1.2.18
+debugpy==1.6.6
+decorator==4.4.2
+defusedxml==0.7.1
+dill==0.3.8
+distributed==2023.8.1
+distro==1.7.0
+dlib==19.24.2
+dm-tree==0.1.8
+docker-pycreds==0.4.0
+docstring-parser==0.15
+docutils==0.18.1
+dopamine-rl==4.0.6
+duckdb==0.9.2
+earthengine-api==0.1.392
+easydict==1.13
+ecos==2.0.13
+editdistance==0.6.2
+eerepr==0.0.4
+en-core-web-sm==3.7.1
+entrypoints==0.4
+et-xmlfile==1.1.0
+etils==1.7.0
+etuples==0.3.9
+exceptiongroup==1.2.0
+fastai==2.7.14
+fastcore==1.5.29
+fastdownload==0.0.7
+fastjsonschema==2.19.1
+fastprogress==1.0.3
+fastrlock==0.8.2
+filelock==3.13.1
+fiona==1.9.5
+firebase-admin==5.3.0
+flatbuffers==23.5.26
+flax==0.8.1
+folium==0.14.0
+fonttools==4.49.0
+frozendict==2.4.0
+frozenlist==1.4.1
+fsspec==2023.6.0
+future==0.18.3
+gast==0.5.4
+gcsfs==2023.6.0
+gdown==4.7.3
+geemap==0.32.0
+gensim==4.3.2
+geocoder==1.38.1
+geographiclib==2.0
+geopandas==0.13.2
+geopy==2.3.0
+gin-config==0.5.0
+gitdb==4.0.11
+glob2==0.7
+google-ai-generativelanguage==0.4.0
+google-api-core==2.11.1
+google-api-python-client==2.84.0
+google-auth-httplib2==0.1.1
+google-auth-oauthlib==1.2.0
+google-auth==2.27.0
+google-cloud-aiplatform==1.43.0
+google-cloud-bigquery-connection==1.12.1
+google-cloud-bigquery-storage==2.24.0
+google-cloud-bigquery==3.12.0
+google-cloud-core==2.3.3
+google-cloud-datastore==2.15.2
+google-cloud-firestore==2.11.1
+google-cloud-functions==1.13.3
+google-cloud-iam==2.14.3
+google-cloud-language==2.13.3
+google-cloud-resource-manager==1.12.3
+google-cloud-storage==2.8.0
+google-cloud-translate==3.11.3
+google-colab==1.0.0
+google-crc32c==1.5.0
+google-generativeai==0.3.2
+google-pasta==0.2.0
+google-resumable-media==2.7.0
+google==2.0.3
+googleapis-common-protos==1.62.0
+googledrivedownloader==0.4
+graphviz==0.20.1
+greenlet==3.0.3
+grpc-google-iam-v1==0.13.0
+grpcio-status==1.48.2
+grpcio==1.62.0
+gspread-dataframe==3.3.1
+gspread==3.4.2
+gym-notices==0.0.8
+gym==0.25.2
+h5netcdf==1.3.0
+h5py==3.9.0
+holidays==0.44
+holoviews==1.17.1
+html5lib==1.1
+httpimport==1.3.1
+httplib2==0.22.0
+huggingface-hub==0.20.3
+humanize==4.7.0
+hyperopt==0.2.7
+ibis-framework==7.1.0
+idna==3.6
+imageio-ffmpeg==0.4.9
+imageio==2.31.6
+imagesize==1.4.1
+imbalanced-learn==0.10.1
+imgaug==0.4.0
+importlib-metadata==7.0.1
+importlib_resources==6.1.2
+imutils==0.5.4
+inflect==7.0.0
+iniconfig==2.0.0
+intel-openmp==2023.2.3
+ipyevents==2.0.2
+ipyfilechooser==0.6.0
+ipykernel==5.5.6
+ipyleaflet==0.18.2
+ipython-genutils==0.2.0
+ipython-sql==0.5.0
+ipython==7.34.0
+ipytree==0.2.2
+ipywidgets==7.7.1
+itsdangerous==2.1.2
+jax==0.4.23
+jaxlib==0.4.23+cuda12.cudnn89
+jeepney==0.7.1
+jieba==0.42.1
+joblib==1.3.2
+jsonpickle==3.0.3
+jsonschema-specifications==2023.12.1
+jsonschema==4.19.2
+jupyter-client==6.1.12
+jupyter-console==6.1.0
+jupyter-server==1.24.0
+jupyter_core==5.7.1
+jupyterlab_pygments==0.3.0
+jupyterlab_widgets==3.0.10
+kaggle==1.5.16
+kagglehub==0.2.0
+keras==2.15.0
+keyring==23.5.0
+kiwisolver==1.4.5
+langcodes==3.3.0
+launchpadlib==1.10.16
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+lazy_loader==0.3
+libclang==16.0.6
+librosa==0.10.1
+lightgbm==4.1.0
+linkify-it-py==2.0.3
+llvmlite==0.41.1
+locket==1.0.0
+logical-unification==0.4.6
+lxml==4.9.4
+malloy==2023.1067
+markdown-it-py==3.0.0
+matplotlib-inline==0.1.6
+matplotlib-venn==0.11.10
+matplotlib==3.7.1
+mdit-py-plugins==0.4.0
+mdurl==0.1.2
+miniKanren==1.0.3
+missingno==0.5.2
+mistune==0.8.4
+mizani==0.9.3
+mkl==2023.2.0
+ml-dtypes==0.2.0
+mlxtend==0.22.0
+more-itertools==10.1.0
+moviepy==1.0.3
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multipledispatch==1.0.0
+multiprocess==0.70.16
+multitasking==0.0.11
+murmurhash==1.0.10
+music21==9.1.0
+natsort==8.4.0
+nbclassic==1.0.0
+nbclient==0.9.0
+nbconvert==6.5.4
+nbformat==5.9.2
+nest-asyncio==1.6.0
+networkx==3.2.1
+nibabel==4.0.2
+nltk==3.8.1
+notebook==6.5.5
+notebook_shim==0.2.4
+numba==0.58.1
+numexpr==2.9.0
+numpy==1.25.2
+oauth2client==4.1.3
+oauthlib==3.2.2
+opencv-contrib-python==4.8.0.76
+opencv-python-headless==4.9.0.80
+opencv-python==4.8.0.76
+openpyxl==3.1.2
+opt-einsum==3.3.0
+optax==0.1.9
+orbax-checkpoint==0.4.4
+osqp==0.6.2.post8
+packaging==23.2
+pandas-datareader==0.10.0
+pandas-gbq==0.19.2
+pandas-stubs==1.5.3.230304
+pandas==2.2.1
+pandocfilters==1.5.1
+panel==1.3.8
+param==2.0.2
+parso==0.8.3
+parsy==2.1
+partd==1.4.1
+pathlib==1.0.1
+patsy==0.5.6
+peewee==3.17.1
+peft==0.9.0
+pexpect==4.9.0
+pickleshare==0.7.5
+pins==0.8.4
+pip-tools==6.13.0
+pip==23.1.2
+platformdirs==4.2.0
+plotly==5.15.0
+plotnine==0.12.4
+pluggy==1.4.0
+polars==0.20.2
+pooch==1.8.1
+portpicker==1.5.2
+prefetch-generator==1.0.3
+preshed==3.0.9
+prettytable==3.10.0
+proglog==0.1.10
+progressbar2==4.2.0
+prometheus_client==0.20.0
+promise==2.3
+prompt-toolkit==3.0.43
+prophet==1.1.5
+proto-plus==1.23.0
+protobuf==3.20.3
+psutil==5.9.5
+psycopg2==2.9.9
+ptyprocess==0.7.0
+py-cpuinfo==9.0.0
+py4j==0.10.9.7
+pyOpenSSL==24.0.0
+pyarrow-hotfix==0.6
+pyarrow==14.0.2
+pyasn1-modules==0.3.0
+pyasn1==0.5.1
+pycocotools==2.0.7
+pycparser==2.21
+pydantic==2.6.3
+pydantic_core==2.16.3
+pydata-google-auth==1.8.2
+pydot-ng==2.0.0
+pydot==1.4.2
+pydotplus==2.0.2
+pyerfa==2.0.1.1
+pygame==2.5.2
+pymc==5.10.4
+pymystem3==0.2.0
+pyparsing==3.1.1
+pyperclip==1.8.2
+pyproj==3.6.1
+pyproject_hooks==1.0.0
+pyshp==2.3.1
+pytensor==2.18.6
+pytest==7.4.4
+python-apt==0.0.0
+python-box==7.1.1
+python-dateutil==2.8.2
+python-louvain==0.16
+python-slugify==8.0.4
+python-utils==3.8.2
+pytz==2023.4
+pyviz_comms==3.0.1
+pyzmq==23.2.1
+qdldl==0.1.7.post0
+qudida==0.0.4
+ratelim==0.1.6
+referencing==0.33.0
+regex==2023.12.25
+requests-oauthlib==1.3.1
+requests==2.31.0
+requirements-parser==0.5.0
+rich==13.7.1
+rpds-py==0.18.0
+rpy2==3.4.2
+rsa==4.9
+safetensors==0.4.2
+scikit-image==0.19.3
+scikit-learn==1.2.2
+scipy==1.11.4
+scooby==0.9.2
+scs==3.2.4.post1
+seaborn==0.13.1
+sentence-transformers==2.5.1
+sentencepiece==0.1.99
+sentry-sdk==1.41.0
+setproctitle==1.3.3
+setuptools==67.7.2
+shapely==2.0.3
+shtab==1.7.1
+six==1.16.0
+six==1.16.0
+sklearn-pandas==2.2.0
+smart-open==6.4.0
+smmap==5.0.1
+sniffio==1.3.1
+snowballstemmer==2.2.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy==3.7.4
+sphinxcontrib-applehelp==1.0.8
+sphinxcontrib-devhelp==1.0.6
+sphinxcontrib-htmlhelp==2.0.5
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.7
+sphinxcontrib-serializinghtml==1.1.10
+sqlglot==19.9.0
+sqlparse==0.4.4
+srsly==2.4.8
+stanio==0.3.0
+statsmodels==0.14.1
+sympy==1.12
+tables==3.8.0
+tabulate==0.9.0
+tbb==2021.11.0
+tblib==3.0.0
+tenacity==8.2.3
+tensorboard-data-server==0.7.2
+tensorboard==2.15.2
+tensorflow-datasets==4.9.4
+tensorflow-estimator==2.15.0
+tensorflow-gcs-config==2.15.0
+tensorflow-hub==0.16.1
+tensorflow-io-gcs-filesystem==0.36.0
+tensorflow-metadata==1.14.0
+tensorflow-probability==0.23.0
+tensorflow==2.15.0
+tensorstore==0.1.45
+termcolor==2.4.0
+terminado==0.18.0
+text-unidecode==1.3
+textblob==0.17.1
+tf-keras==2.15.0
+tf-slim==1.1.0
+thinc==8.2.3
+threadpoolctl==3.3.0
+tifffile==2024.2.12
+tinycss2==1.2.1
+tokenizers==0.15.2
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.1
+torch==2.1.0+cu121
+torchaudio==2.1.0+cu121
+torchdata==0.7.0
+torchsummary==1.5.1
+torchtext==0.16.0
+torchvision==0.16.0+cu121
+tornado==6.3.3
+tqdm==4.66.2
+traitlets==5.7.1
+traittypes==0.2.1
+transformers==4.38.2
+triton==2.1.0
+trl==0.7.11
+tweepy==4.14.0
+typer==0.9.0
+types-pytz==2024.1.0.20240203
+types-setuptools==69.1.0.20240302
+typing_extensions==4.10.0
+tyro==0.7.3
+tzdata==2024.1
+tzlocal==5.2
+uc-micro-py==1.0.3
+uritemplate==4.1.1
+urllib3==2.0.7
+vega-datasets==0.9.0
+wadllib==1.3.6
+wandb==0.16.4
+wasabi==1.1.2
+wcwidth==0.2.13
+weasel==0.3.4
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+wheel==0.42.0
+widgetsnbextension==3.6.6
+wordcloud==1.9.3
+wrapt==1.14.1
+xarray-einstats==0.7.0
+xarray==2023.7.0
+xgboost==2.0.3
+xlrd==2.0.1
+xxhash==3.4.1
+xyzservices==2023.10.1
+yarl==1.9.4
+yellowbrick==1.5
+yfinance==0.2.37
+zict==3.0.0
+zipp==3.17.0

wandb/run-20240310_031850-h5a6szhj/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+    "os": "Linux-6.1.58+-x86_64-with-glibc2.35",
+    "python": "3.10.12",
+    "heartbeatAt": "2024-03-10T03:18:54.772132",
+    "startedAt": "2024-03-10T03:18:50.350319",
+    "docker": null,
+    "cuda": null,
+    "args": [],
+    "state": "running",
+    "program": "Copy%20of%20Solar-train-QLoRA.ipynb",
+    "codePathLocal": null,
+    "colab": "https://colab.research.google.com/notebook#fileId=1cntH6JMHtnqGybNA0Y55Jk1U_HRTWn3M",
+    "host": "df99ceffa61d",
+    "username": "root",
+    "executable": "/usr/bin/python3",
+    "cpu_count": 1,
+    "cpu_count_logical": 2,
+    "cpu_freq": {
+        "current": 2199.998,
+        "min": 0.0,
+        "max": 0.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 2199.998,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2199.998,
+            "min": 0.0,
+            "max": 0.0
+        }
+    ],
+    "disk": {
+        "/": {
+            "total": 78.1898422241211,
+            "used": 47.104637145996094
+        }
+    },
+    "gpu": "Tesla T4",
+    "gpu_count": 1,
+    "gpu_devices": [
+        {
+            "name": "Tesla T4",
+            "memory_total": 16106127360
+        }
+    ],
+    "memory": {
+        "total": 12.674781799316406
+    }
+}

wandb/run-20240310_031850-h5a6szhj/logs/debug.log ADDED Viewed

	@@ -0,0 +1,77 @@

+2024-03-10 03:18:50,408 INFO    MainThread:323 [wandb_setup.py:_flush():76] Current SDK version is 0.16.4
+2024-03-10 03:18:50,410 INFO    MainThread:323 [wandb_setup.py:_flush():76] Configure stats pid to 323
+2024-03-10 03:18:50,411 INFO    MainThread:323 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2024-03-10 03:18:50,411 INFO    MainThread:323 [wandb_setup.py:_flush():76] Loading settings from /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/settings
+2024-03-10 03:18:50,411 INFO    MainThread:323 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-03-10 03:18:50,412 INFO    MainThread:323 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-03-10 03:18:50,412 INFO    MainThread:323 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-03-10 03:18:50,412 INFO    MainThread:323 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2024-03-10 03:18:50,413 INFO    MainThread:323 [wandb_init.py:_log_setup():526] Logging user logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_031850-h5a6szhj/logs/debug.log
+2024-03-10 03:18:50,414 INFO    MainThread:323 [wandb_init.py:_log_setup():527] Logging internal logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_031850-h5a6szhj/logs/debug-internal.log
+2024-03-10 03:18:50,414 INFO    MainThread:323 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x794c78f99780>
+2024-03-10 03:18:50,415 INFO    MainThread:323 [wandb_init.py:init():566] calling init triggers
+2024-03-10 03:18:50,415 INFO    MainThread:323 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
+config: {}
+2024-03-10 03:18:50,416 INFO    MainThread:323 [wandb_init.py:init():616] starting backend
+2024-03-10 03:18:50,416 INFO    MainThread:323 [wandb_init.py:init():620] setting up manager
+2024-03-10 03:18:50,424 INFO    MainThread:323 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-03-10 03:18:50,430 INFO    MainThread:323 [wandb_init.py:init():628] backend started and connected
+2024-03-10 03:18:50,467 INFO    MainThread:323 [wandb_run.py:_label_probe_notebook():1295] probe notebook
+2024-03-10 03:18:53,361 INFO    MainThread:323 [wandb_init.py:init():720] updated telemetry
+2024-03-10 03:18:53,375 INFO    MainThread:323 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
+2024-03-10 03:18:54,216 INFO    MainThread:323 [wandb_run.py:_on_init():2262] communicating current version
+2024-03-10 03:18:54,625 INFO    MainThread:323 [wandb_run.py:_on_init():2271] got version response
+2024-03-10 03:18:54,625 INFO    MainThread:323 [wandb_init.py:init():804] starting run threads in backend
+2024-03-10 03:18:56,342 INFO    MainThread:323 [wandb_run.py:_console_start():2241] atexit reg
+2024-03-10 03:18:56,342 INFO    MainThread:323 [wandb_run.py:_redirect():2096] redirect: wrap_raw
+2024-03-10 03:18:56,342 INFO    MainThread:323 [wandb_run.py:_redirect():2161] Wrapping output streams.
+2024-03-10 03:18:56,343 INFO    MainThread:323 [wandb_run.py:_redirect():2186] Redirects installed.
+2024-03-10 03:18:56,345 INFO    MainThread:323 [wandb_init.py:init():847] run started, returning control to user process
+2024-03-10 03:18:56,358 INFO    MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-18-22_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 50, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-10 03:19:29,364 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:19:29,364 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:19:48,135 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:19:48,145 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:19:48,145 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:20:26,801 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:20:28,702 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:20:28,702 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:20:30,823 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:21:36,022 INFO    MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-19-48_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 50, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-500', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-10 03:21:51,269 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:21:51,269 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:22:00,243 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:22:00,256 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:22:00,256 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:22:03,314 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:22:03,457 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:22:03,457 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:22:12,817 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:22:29,320 INFO    MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-22-00_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 50, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-10 03:30:10,462 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:30:10,462 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:30:16,433 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:30:16,515 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:30:16,515 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:30:49,356 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:30:49,411 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:30:49,411 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:30:59,671 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:30:59,809 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:30:59,810 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:31:01,547 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:31:01,596 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:31:01,598 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:31:18,837 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:31:18,886 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:31:18,886 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:31:26,809 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:31:30,144 INFO    MainThread:323 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_03-30-49_df99ceffa61d', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-10 03:36:13,219 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:36:13,220 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:36:16,498 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:36:25,544 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:36:25,545 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-10 03:37:27,886 INFO    MainThread:323 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-10 03:37:34,416 INFO    MainThread:323 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-10 03:37:34,416 INFO    MainThread:323 [wandb_init.py:_pause_backend():437] pausing backend

wandb/run-20240310_031850-h5a6szhj/run-h5a6szhj.wandb ADDED Viewed

Binary file (32.9 kB). View file

wandb/run-20240310_065024-lx2gw13k/files/config.yaml ADDED Viewed

	@@ -0,0 +1,680 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.10.12
+    cli_version: 0.16.4
+    framework: huggingface
+    huggingface_version: 4.38.2
+    is_jupyter_run: true
+    is_kaggle_kernel: false
+    start_time: 1710053424.0
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      2:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      3:
+      - 7
+      - 23
+      4: 3.10.12
+      5: 0.16.4
+      6: 4.38.2
+      8:
+      - 1
+      - 5
+      - 12
+      9:
+        1: transformers_trainer
+      13: linux-x86_64
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+    - 1: train/loss
+      5: 1
+      6:
+      - 1
+    - 1: train/grad_norm
+      5: 1
+      6:
+      - 1
+    - 1: train/learning_rate
+      5: 1
+      6:
+      - 1
+    - 1: train/epoch
+      5: 1
+      6:
+      - 1
+vocab_size:
+  desc: null
+  value: 48000
+max_position_embeddings:
+  desc: null
+  value: 4096
+hidden_size:
+  desc: null
+  value: 4096
+intermediate_size:
+  desc: null
+  value: 14336
+num_hidden_layers:
+  desc: null
+  value: 48
+num_attention_heads:
+  desc: null
+  value: 32
+num_key_value_heads:
+  desc: null
+  value: 8
+hidden_act:
+  desc: null
+  value: silu
+initializer_range:
+  desc: null
+  value: 0.02
+rms_norm_eps:
+  desc: null
+  value: 1.0e-05
+pretraining_tp:
+  desc: null
+  value: 1
+use_cache:
+  desc: null
+  value: true
+rope_theta:
+  desc: null
+  value: 10000.0
+rope_scaling:
+  desc: null
+  value: null
+attention_bias:
+  desc: null
+  value: false
+attention_dropout:
+  desc: null
+  value: 0.0
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: float16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: false
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - LlamaForCausalLM
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+bos_token_id:
+  desc: null
+  value: 1
+pad_token_id:
+  desc: null
+  value: 2
+eos_token_id:
+  desc: null
+  value: 32000
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: Edentns/DataVortexS-10.7B-dpo-v1.11
+transformers_version:
+  desc: null
+  value: 4.38.2
+model_type:
+  desc: null
+  value: llama
+quantization_config:
+  desc: null
+  value:
+    quant_method: QuantizationMethod.BITS_AND_BYTES
+    _load_in_8bit: false
+    _load_in_4bit: true
+    llm_int8_threshold: 6.0
+    llm_int8_skip_modules: null
+    llm_int8_enable_fp32_cpu_offload: false
+    llm_int8_has_fp16_weight: false
+    bnb_4bit_quant_type: nf4
+    bnb_4bit_use_double_quant: true
+    bnb_4bit_compute_dtype: float16
+    load_in_4bit: true
+    load_in_8bit: false
+output_dir:
+  desc: null
+  value: /content/drive/MyDrive/best-one
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: false
+do_predict:
+  desc: null
+  value: false
+evaluation_strategy:
+  desc: null
+  value: 'no'
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 1
+per_device_eval_batch_size:
+  desc: null
+  value: 8
+per_gpu_train_batch_size:
+  desc: null
+  value: null
+per_gpu_eval_batch_size:
+  desc: null
+  value: null
+gradient_accumulation_steps:
+  desc: null
+  value: 1
+eval_accumulation_steps:
+  desc: null
+  value: null
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 0.0002
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 0.3
+num_train_epochs:
+  desc: null
+  value: 4
+max_steps:
+  desc: null
+  value: -1
+lr_scheduler_type:
+  desc: null
+  value: constant
+lr_scheduler_kwargs:
+  desc: null
+  value: {}
+warmup_ratio:
+  desc: null
+  value: 0.03
+warmup_steps:
+  desc: null
+  value: 0
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: /content/drive/MyDrive/best-one/runs/Mar10_06-48-48_17daf5749447
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 500
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: steps
+save_steps:
+  desc: null
+  value: 100
+save_total_limit:
+  desc: null
+  value: null
+save_safetensors:
+  desc: null
+  value: true
+save_on_each_node:
+  desc: null
+  value: false
+save_only_model:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_cpu:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: null
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: null
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: null
+tpu_num_cores:
+  desc: null
+  value: null
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: []
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: null
+dataloader_num_workers:
+  desc: null
+  value: 0
+dataloader_prefetch_factor:
+  desc: null
+  value: null
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: /content/drive/MyDrive/best-one
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: null
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: null
+greater_is_better:
+  desc: null
+  value: null
+ignore_data_skip:
+  desc: null
+  value: false
+fsdp:
+  desc: null
+  value: []
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_v2: false
+    xla_fsdp_grad_ckpt: false
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: null
+accelerator_config:
+  desc: null
+  value:
+    split_batches: false
+    dispatch_batches: null
+    even_batches: true
+    use_seedable_sampler: true
+deepspeed:
+  desc: null
+  value: null
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: adamw_torch_fused
+optim_args:
+  desc: null
+  value: null
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: false
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value:
+  - tensorboard
+  - wandb
+ddp_find_unused_parameters:
+  desc: null
+  value: null
+ddp_bucket_cap_mb:
+  desc: null
+  value: null
+ddp_broadcast_buffers:
+  desc: null
+  value: null
+dataloader_pin_memory:
+  desc: null
+  value: true
+dataloader_persistent_workers:
+  desc: null
+  value: false
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: true
+resume_from_checkpoint:
+  desc: null
+  value: /content/drive/MyDrive/best-one/checkpoint-1000
+hub_model_id:
+  desc: null
+  value: null
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+hub_always_push:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: true
+gradient_checkpointing_kwargs:
+  desc: null
+  value: null
+include_inputs_for_metrics:
+  desc: null
+  value: false
+fp16_backend:
+  desc: null
+  value: auto
+push_to_hub_model_id:
+  desc: null
+  value: null
+push_to_hub_organization:
+  desc: null
+  value: null
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: null
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: null
+torch_compile_mode:
+  desc: null
+  value: null
+dispatch_batches:
+  desc: null
+  value: null
+split_batches:
+  desc: null
+  value: null
+include_tokens_per_second:
+  desc: null
+  value: false
+include_num_input_tokens_seen:
+  desc: null
+  value: false
+neftune_noise_alpha:
+  desc: null
+  value: null

wandb/run-20240310_065024-lx2gw13k/files/output.log ADDED Viewed

	@@ -0,0 +1,6 @@

+`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
+/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.

wandb/run-20240310_065024-lx2gw13k/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,500 @@

+Babel==2.14.0
+CacheControl==0.14.0
+Cython==3.0.9
+Flask==2.2.5
+GDAL==3.6.4
+GitPython==3.1.42
+Jinja2==3.1.3
+Markdown==3.5.2
+MarkupSafe==2.1.5
+Pillow==9.4.0
+PyDrive2==1.6.3
+PyDrive==1.3.1
+PyGObject==3.42.1
+PyJWT==2.3.0
+PyOpenGL==3.1.7
+PySocks==1.7.1
+PyWavelets==1.5.0
+PyYAML==6.0.1
+Pygments==2.16.1
+SQLAlchemy==2.0.28
+SecretStorage==3.3.1
+Send2Trash==1.8.2
+Sphinx==5.0.2
+Werkzeug==3.0.1
+absl-py==1.4.0
+accelerate==0.27.2
+aiohttp==3.9.3
+aiosignal==1.3.1
+alabaster==0.7.16
+albumentations==1.3.1
+altair==4.2.2
+annotated-types==0.6.0
+anyio==3.7.1
+appdirs==1.4.4
+argon2-cffi-bindings==21.2.0
+argon2-cffi==23.1.0
+array-record==0.5.0
+arviz==0.15.1
+astropy==5.3.4
+astunparse==1.6.3
+async-timeout==4.0.3
+atpublic==4.0
+attrs==23.2.0
+audioread==3.0.1
+autograd==1.6.2
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bidict==0.23.1
+bigframes==0.22.0
+bitsandbytes==0.43.0
+bleach==6.1.0
+blinker==1.4
+blis==0.7.11
+blosc2==2.0.0
+bokeh==3.3.4
+bqplot==0.12.43
+branca==0.7.1
+build==1.1.1
+cachetools==5.3.3
+catalogue==2.0.10
+certifi==2024.2.2
+cffi==1.16.0
+chardet==5.2.0
+charset-normalizer==3.3.2
+chex==0.1.85
+click-plugins==1.1.1
+click==8.1.7
+cligj==0.7.2
+cloudpathlib==0.16.0
+cloudpickle==2.2.1
+cmake==3.27.9
+cmdstanpy==1.2.1
+colorcet==3.1.0
+colorlover==0.3.0
+colour==0.1.5
+community==1.0.0b1
+confection==0.1.4
+cons==0.4.6
+contextlib2==21.6.0
+contourpy==1.2.0
+cryptography==42.0.5
+cufflinks==0.17.3
+cupy-cuda12x==12.2.0
+cvxopt==1.3.2
+cvxpy==1.3.3
+cycler==0.12.1
+cymem==2.0.8
+dask==2023.8.1
+datascience==0.17.6
+datasets==2.18.0
+db-dtypes==1.2.0
+dbus-python==1.2.18
+debugpy==1.6.6
+decorator==4.4.2
+defusedxml==0.7.1
+dill==0.3.8
+distributed==2023.8.1
+distro==1.7.0
+dlib==19.24.2
+dm-tree==0.1.8
+docker-pycreds==0.4.0
+docstring-parser==0.15
+docutils==0.18.1
+dopamine-rl==4.0.6
+duckdb==0.9.2
+earthengine-api==0.1.392
+easydict==1.13
+ecos==2.0.13
+editdistance==0.6.2
+eerepr==0.0.4
+en-core-web-sm==3.7.1
+entrypoints==0.4
+et-xmlfile==1.1.0
+etils==1.7.0
+etuples==0.3.9
+exceptiongroup==1.2.0
+fastai==2.7.14
+fastcore==1.5.29
+fastdownload==0.0.7
+fastjsonschema==2.19.1
+fastprogress==1.0.3
+fastrlock==0.8.2
+filelock==3.13.1
+fiona==1.9.5
+firebase-admin==5.3.0
+flatbuffers==23.5.26
+flax==0.8.1
+folium==0.14.0
+fonttools==4.49.0
+frozendict==2.4.0
+frozenlist==1.4.1
+fsspec==2023.6.0
+future==0.18.3
+gast==0.5.4
+gcsfs==2023.6.0
+gdown==4.7.3
+geemap==0.32.0
+gensim==4.3.2
+geocoder==1.38.1
+geographiclib==2.0
+geopandas==0.13.2
+geopy==2.3.0
+gin-config==0.5.0
+gitdb==4.0.11
+glob2==0.7
+google-ai-generativelanguage==0.4.0
+google-api-core==2.11.1
+google-api-python-client==2.84.0
+google-auth-httplib2==0.1.1
+google-auth-oauthlib==1.2.0
+google-auth==2.27.0
+google-cloud-aiplatform==1.43.0
+google-cloud-bigquery-connection==1.12.1
+google-cloud-bigquery-storage==2.24.0
+google-cloud-bigquery==3.12.0
+google-cloud-core==2.3.3
+google-cloud-datastore==2.15.2
+google-cloud-firestore==2.11.1
+google-cloud-functions==1.13.3
+google-cloud-iam==2.14.3
+google-cloud-language==2.13.3
+google-cloud-resource-manager==1.12.3
+google-cloud-storage==2.8.0
+google-cloud-translate==3.11.3
+google-colab==1.0.0
+google-crc32c==1.5.0
+google-generativeai==0.3.2
+google-pasta==0.2.0
+google-resumable-media==2.7.0
+google==2.0.3
+googleapis-common-protos==1.62.0
+googledrivedownloader==0.4
+graphviz==0.20.1
+greenlet==3.0.3
+grpc-google-iam-v1==0.13.0
+grpcio-status==1.48.2
+grpcio==1.62.0
+gspread-dataframe==3.3.1
+gspread==3.4.2
+gym-notices==0.0.8
+gym==0.25.2
+h5netcdf==1.3.0
+h5py==3.9.0
+holidays==0.44
+holoviews==1.17.1
+html5lib==1.1
+httpimport==1.3.1
+httplib2==0.22.0
+huggingface-hub==0.20.3
+humanize==4.7.0
+hyperopt==0.2.7
+ibis-framework==7.1.0
+idna==3.6
+imageio-ffmpeg==0.4.9
+imageio==2.31.6
+imagesize==1.4.1
+imbalanced-learn==0.10.1
+imgaug==0.4.0
+importlib-metadata==7.0.1
+importlib_resources==6.1.2
+imutils==0.5.4
+inflect==7.0.0
+iniconfig==2.0.0
+intel-openmp==2023.2.3
+ipyevents==2.0.2
+ipyfilechooser==0.6.0
+ipykernel==5.5.6
+ipyleaflet==0.18.2
+ipython-genutils==0.2.0
+ipython-sql==0.5.0
+ipython==7.34.0
+ipytree==0.2.2
+ipywidgets==7.7.1
+itsdangerous==2.1.2
+jax==0.4.23
+jaxlib==0.4.23+cuda12.cudnn89
+jeepney==0.7.1
+jieba==0.42.1
+joblib==1.3.2
+jsonpickle==3.0.3
+jsonschema-specifications==2023.12.1
+jsonschema==4.19.2
+jupyter-client==6.1.12
+jupyter-console==6.1.0
+jupyter-server==1.24.0
+jupyter_core==5.7.1
+jupyterlab_pygments==0.3.0
+jupyterlab_widgets==3.0.10
+kaggle==1.5.16
+kagglehub==0.2.0
+keras==2.15.0
+keyring==23.5.0
+kiwisolver==1.4.5
+langcodes==3.3.0
+launchpadlib==1.10.16
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+lazy_loader==0.3
+libclang==16.0.6
+librosa==0.10.1
+lightgbm==4.1.0
+linkify-it-py==2.0.3
+llvmlite==0.41.1
+locket==1.0.0
+logical-unification==0.4.6
+lxml==4.9.4
+malloy==2023.1067
+markdown-it-py==3.0.0
+matplotlib-inline==0.1.6
+matplotlib-venn==0.11.10
+matplotlib==3.7.1
+mdit-py-plugins==0.4.0
+mdurl==0.1.2
+miniKanren==1.0.3
+missingno==0.5.2
+mistune==0.8.4
+mizani==0.9.3
+mkl==2023.2.0
+ml-dtypes==0.2.0
+mlxtend==0.22.0
+more-itertools==10.1.0
+moviepy==1.0.3
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multipledispatch==1.0.0
+multiprocess==0.70.16
+multitasking==0.0.11
+murmurhash==1.0.10
+music21==9.1.0
+natsort==8.4.0
+nbclassic==1.0.0
+nbclient==0.9.0
+nbconvert==6.5.4
+nbformat==5.9.2
+nest-asyncio==1.6.0
+networkx==3.2.1
+nibabel==4.0.2
+nltk==3.8.1
+notebook==6.5.5
+notebook_shim==0.2.4
+numba==0.58.1
+numexpr==2.9.0
+numpy==1.25.2
+oauth2client==4.1.3
+oauthlib==3.2.2
+opencv-contrib-python==4.8.0.76
+opencv-python-headless==4.9.0.80
+opencv-python==4.8.0.76
+openpyxl==3.1.2
+opt-einsum==3.3.0
+optax==0.1.9
+orbax-checkpoint==0.4.4
+osqp==0.6.2.post8
+packaging==23.2
+pandas-datareader==0.10.0
+pandas-gbq==0.19.2
+pandas-stubs==1.5.3.230304
+pandas==2.2.1
+pandocfilters==1.5.1
+panel==1.3.8
+param==2.0.2
+parso==0.8.3
+parsy==2.1
+partd==1.4.1
+pathlib==1.0.1
+patsy==0.5.6
+peewee==3.17.1
+peft==0.9.0
+pexpect==4.9.0
+pickleshare==0.7.5
+pins==0.8.4
+pip-tools==6.13.0
+pip==23.1.2
+platformdirs==4.2.0
+plotly==5.15.0
+plotnine==0.12.4
+pluggy==1.4.0
+polars==0.20.2
+pooch==1.8.1
+portpicker==1.5.2
+prefetch-generator==1.0.3
+preshed==3.0.9
+prettytable==3.10.0
+proglog==0.1.10
+progressbar2==4.2.0
+prometheus_client==0.20.0
+promise==2.3
+prompt-toolkit==3.0.43
+prophet==1.1.5
+proto-plus==1.23.0
+protobuf==3.20.3
+psutil==5.9.5
+psycopg2==2.9.9
+ptyprocess==0.7.0
+py-cpuinfo==9.0.0
+py4j==0.10.9.7
+pyOpenSSL==24.0.0
+pyarrow-hotfix==0.6
+pyarrow==14.0.2
+pyasn1-modules==0.3.0
+pyasn1==0.5.1
+pycocotools==2.0.7
+pycparser==2.21
+pydantic==2.6.3
+pydantic_core==2.16.3
+pydata-google-auth==1.8.2
+pydot-ng==2.0.0
+pydot==1.4.2
+pydotplus==2.0.2
+pyerfa==2.0.1.1
+pygame==2.5.2
+pymc==5.10.4
+pymystem3==0.2.0
+pyparsing==3.1.1
+pyperclip==1.8.2
+pyproj==3.6.1
+pyproject_hooks==1.0.0
+pyshp==2.3.1
+pytensor==2.18.6
+pytest==7.4.4
+python-apt==0.0.0
+python-box==7.1.1
+python-dateutil==2.8.2
+python-louvain==0.16
+python-slugify==8.0.4
+python-utils==3.8.2
+pytz==2023.4
+pyviz_comms==3.0.1
+pyzmq==23.2.1
+qdldl==0.1.7.post0
+qudida==0.0.4
+ratelim==0.1.6
+referencing==0.33.0
+regex==2023.12.25
+requests-oauthlib==1.3.1
+requests==2.31.0
+requirements-parser==0.5.0
+rich==13.7.1
+rpds-py==0.18.0
+rpy2==3.4.2
+rsa==4.9
+safetensors==0.4.2
+scikit-image==0.19.3
+scikit-learn==1.2.2
+scipy==1.11.4
+scooby==0.9.2
+scs==3.2.4.post1
+seaborn==0.13.1
+sentence-transformers==2.5.1
+sentencepiece==0.1.99
+sentry-sdk==1.41.0
+setproctitle==1.3.3
+setuptools==67.7.2
+shapely==2.0.3
+shtab==1.7.1
+six==1.16.0
+six==1.16.0
+sklearn-pandas==2.2.0
+smart-open==6.4.0
+smmap==5.0.1
+sniffio==1.3.1
+snowballstemmer==2.2.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy==3.7.4
+sphinxcontrib-applehelp==1.0.8
+sphinxcontrib-devhelp==1.0.6
+sphinxcontrib-htmlhelp==2.0.5
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.7
+sphinxcontrib-serializinghtml==1.1.10
+sqlglot==19.9.0
+sqlparse==0.4.4
+srsly==2.4.8
+stanio==0.3.0
+statsmodels==0.14.1
+sympy==1.12
+tables==3.8.0
+tabulate==0.9.0
+tbb==2021.11.0
+tblib==3.0.0
+tenacity==8.2.3
+tensorboard-data-server==0.7.2
+tensorboard==2.15.2
+tensorflow-datasets==4.9.4
+tensorflow-estimator==2.15.0
+tensorflow-gcs-config==2.15.0
+tensorflow-hub==0.16.1
+tensorflow-io-gcs-filesystem==0.36.0
+tensorflow-metadata==1.14.0
+tensorflow-probability==0.23.0
+tensorflow==2.15.0
+tensorstore==0.1.45
+termcolor==2.4.0
+terminado==0.18.0
+text-unidecode==1.3
+textblob==0.17.1
+tf-keras==2.15.0
+tf-slim==1.1.0
+thinc==8.2.3
+threadpoolctl==3.3.0
+tifffile==2024.2.12
+tinycss2==1.2.1
+tokenizers==0.15.2
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.1
+torch==2.1.0+cu121
+torchaudio==2.1.0+cu121
+torchdata==0.7.0
+torchsummary==1.5.1
+torchtext==0.16.0
+torchvision==0.16.0+cu121
+tornado==6.3.3
+tqdm==4.66.2
+traitlets==5.7.1
+traittypes==0.2.1
+transformers==4.38.2
+triton==2.1.0
+trl==0.7.11
+tweepy==4.14.0
+typer==0.9.0
+types-pytz==2024.1.0.20240203
+types-setuptools==69.1.0.20240302
+typing_extensions==4.10.0
+tyro==0.7.3
+tzdata==2024.1
+tzlocal==5.2
+uc-micro-py==1.0.3
+uritemplate==4.1.1
+urllib3==2.0.7
+vega-datasets==0.9.0
+wadllib==1.3.6
+wandb==0.16.4
+wasabi==1.1.2
+wcwidth==0.2.13
+weasel==0.3.4
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+wheel==0.42.0
+widgetsnbextension==3.6.6
+wordcloud==1.9.3
+wrapt==1.14.1
+xarray-einstats==0.7.0
+xarray==2023.7.0
+xgboost==2.0.3
+xlrd==2.0.1
+xxhash==3.4.1
+xyzservices==2023.10.1
+yarl==1.9.4
+yellowbrick==1.5
+yfinance==0.2.37
+zict==3.0.0
+zipp==3.17.0

wandb/run-20240310_065024-lx2gw13k/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+    "os": "Linux-6.1.58+-x86_64-with-glibc2.35",
+    "python": "3.10.12",
+    "heartbeatAt": "2024-03-10T06:50:27.883167",
+    "startedAt": "2024-03-10T06:50:24.686000",
+    "docker": null,
+    "cuda": null,
+    "args": [],
+    "state": "running",
+    "program": "Final-Solar-train-QLoRA.ipynb",
+    "codePathLocal": null,
+    "colab": "https://colab.research.google.com/notebook#fileId=1cntH6JMHtnqGybNA0Y55Jk1U_HRTWn3M",
+    "host": "17daf5749447",
+    "username": "root",
+    "executable": "/usr/bin/python3",
+    "cpu_count": 1,
+    "cpu_count_logical": 2,
+    "cpu_freq": {
+        "current": 2000.202,
+        "min": 0.0,
+        "max": 0.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 2000.202,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2000.202,
+            "min": 0.0,
+            "max": 0.0
+        }
+    ],
+    "disk": {
+        "/": {
+            "total": 78.1898422241211,
+            "used": 48.04489517211914
+        }
+    },
+    "gpu": "Tesla T4",
+    "gpu_count": 1,
+    "gpu_devices": [
+        {
+            "name": "Tesla T4",
+            "memory_total": 16106127360
+        }
+    ],
+    "memory": {
+        "total": 12.674789428710938
+    }
+}

wandb/run-20240310_065024-lx2gw13k/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"train/loss": 0.6012, "train/grad_norm": 3.3899552822113037, "train/learning_rate": 0.0002, "train/epoch": 1.99, "train/global_step": 2000, "_timestamp": 1710061377.8150547, "_runtime": 7953.062962770462, "_step": 1}

wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240310_065024-lx2gw13k/logs/debug.log ADDED Viewed

	@@ -0,0 +1,30 @@

+2024-03-10 06:50:24,735 INFO    MainThread:929 [wandb_setup.py:_flush():76] Current SDK version is 0.16.4
+2024-03-10 06:50:24,736 INFO    MainThread:929 [wandb_setup.py:_flush():76] Configure stats pid to 929
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Loading settings from /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/settings
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-03-10 06:50:24,737 INFO    MainThread:929 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-03-10 06:50:24,738 INFO    MainThread:929 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-03-10 06:50:24,738 INFO    MainThread:929 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2024-03-10 06:50:24,739 INFO    MainThread:929 [wandb_init.py:_log_setup():526] Logging user logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug.log
+2024-03-10 06:50:24,740 INFO    MainThread:929 [wandb_init.py:_log_setup():527] Logging internal logs to /content/drive/.shortcut-targets-by-id/1VA6x7g-jYQKnbJblLJmHQJesD5-S3Ury/best-one/wandb/run-20240310_065024-lx2gw13k/logs/debug-internal.log
+2024-03-10 06:50:24,740 INFO    MainThread:929 [wandb_init.py:_jupyter_setup():472] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7aa380ba0220>
+2024-03-10 06:50:24,741 INFO    MainThread:929 [wandb_init.py:init():566] calling init triggers
+2024-03-10 06:50:24,741 INFO    MainThread:929 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
+config: {}
+2024-03-10 06:50:24,741 INFO    MainThread:929 [wandb_init.py:init():616] starting backend
+2024-03-10 06:50:24,742 INFO    MainThread:929 [wandb_init.py:init():620] setting up manager
+2024-03-10 06:50:24,749 INFO    MainThread:929 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-03-10 06:50:24,751 INFO    MainThread:929 [wandb_init.py:init():628] backend started and connected
+2024-03-10 06:50:24,833 INFO    MainThread:929 [wandb_run.py:_label_probe_notebook():1295] probe notebook
+2024-03-10 06:50:27,302 INFO    MainThread:929 [wandb_init.py:init():720] updated telemetry
+2024-03-10 06:50:27,312 INFO    MainThread:929 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
+2024-03-10 06:50:27,741 INFO    MainThread:929 [wandb_run.py:_on_init():2262] communicating current version
+2024-03-10 06:50:27,844 INFO    MainThread:929 [wandb_run.py:_on_init():2271] got version response
+2024-03-10 06:50:27,844 INFO    MainThread:929 [wandb_init.py:init():804] starting run threads in backend
+2024-03-10 06:50:28,339 INFO    MainThread:929 [wandb_run.py:_console_start():2241] atexit reg
+2024-03-10 06:50:28,339 INFO    MainThread:929 [wandb_run.py:_redirect():2096] redirect: wrap_raw
+2024-03-10 06:50:28,340 INFO    MainThread:929 [wandb_run.py:_redirect():2161] Wrapping output streams.
+2024-03-10 06:50:28,340 INFO    MainThread:929 [wandb_run.py:_redirect():2186] Redirects installed.
+2024-03-10 06:50:28,343 INFO    MainThread:929 [wandb_init.py:init():847] run started, returning control to user process
+2024-03-10 06:50:28,351 INFO    MainThread:929 [wandb_run.py:_config_callback():1343] config_cb None None {'vocab_size': 48000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 48, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 2, 'eos_token_id': 32000, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Edentns/DataVortexS-10.7B-dpo-v1.11', 'transformers_version': '4.38.2', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'float16', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/content/drive/MyDrive/best-one', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 4, 'max_steps': -1, 'lr_scheduler_type': 'constant', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/content/drive/MyDrive/best-one/runs/Mar10_06-48-48_17daf5749447', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/content/drive/MyDrive/best-one', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': '/content/drive/MyDrive/best-one/checkpoint-1000', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}

wandb/run-20240310_065024-lx2gw13k/run-lx2gw13k.wandb ADDED Viewed

Binary file (164 kB). View file