_name_or_path: value: openai/whisper-large-v3 _wandb: value: cli_version: 0.18.3 m: - "1": train/global_step "6": - 3 "7": [] - "1": train/epoch "5": 1 "6": - 1 - 3 "7": [] python_version: 3.12.3 t: "1": - 1 - 5 - 11 - 49 - 51 - 53 - 55 - 71 - 100 "2": - 1 - 5 - 11 - 49 - 51 - 53 - 55 - 71 - 100 "3": - 7 - 13 - 19 - 23 - 55 - 62 - 66 "4": 3.12.3 "5": 0.18.3 "6": 4.46.0.dev0 "8": - 5 "9": "1": transformers_trainer "12": 0.18.3 "13": linux-x86_64 accelerator_config: value: dispatch_batches: null even_batches: true gradient_accumulation_kwargs: null non_blocking: false split_batches: false use_seedable_sampler: true activation_dropout: value: 0 activation_function: value: gelu adafactor: value: false adam_beta1: value: 0.9 adam_beta2: value: 0.999 adam_epsilon: value: 1e-08 add_cross_attention: value: false apply_spec_augment: value: false architectures: value: - WhisperForConditionalGeneration attention_dropout: value: 0 auto_find_batch_size: value: false bad_words_ids: value: null batch_eval_metrics: value: false begin_suppress_tokens: value: - 220 - 50257 bf16: value: false bf16_full_eval: value: false bos_token_id: value: 50257 chunk_size_feed_forward: value: 0 classifier_proj_size: value: 256 cross_attention_hidden_size: value: null d_model: value: 1280 data_seed: value: null dataloader_drop_last: value: false dataloader_num_workers: value: 0 dataloader_persistent_workers: value: false dataloader_pin_memory: value: true dataloader_prefetch_factor: value: null ddp_backend: value: null ddp_broadcast_buffers: value: null ddp_bucket_cap_mb: value: null ddp_find_unused_parameters: value: null ddp_timeout: value: 1800 debug: value: [] decoder_attention_heads: value: 20 decoder_ffn_dim: value: 5120 decoder_layerdrop: value: 0 decoder_layers: value: 32 decoder_start_token_id: value: 50258 deepspeed: value: null disable_tqdm: value: false dispatch_batches: value: null diversity_penalty: value: 0 do_eval: value: true do_predict: value: false do_sample: value: false do_train: value: true dropout: value: 0 early_stopping: value: false encoder_attention_heads: value: 20 encoder_ffn_dim: value: 5120 encoder_layerdrop: value: 0 encoder_layers: value: 32 encoder_no_repeat_ngram_size: value: 0 eos_token_id: value: 50257 eval_accumulation_steps: value: null eval_delay: value: 0 eval_do_concat_batches: value: true eval_on_start: value: false eval_steps: value: 500 eval_strategy: value: steps eval_use_gather_object: value: false evaluation_strategy: value: steps exponential_decay_length_penalty: value: null finetuning_task: value: null forced_bos_token_id: value: null forced_decoder_ids: value: null forced_eos_token_id: value: null fp16: value: true fp16_backend: value: auto fp16_full_eval: value: false fp16_opt_level: value: O1 fsdp: value: [] fsdp_config: value: min_num_params: 0 xla: false xla_fsdp_grad_ckpt: false xla_fsdp_v2: false fsdp_min_num_params: value: 0 fsdp_transformer_layer_cls_to_wrap: value: null full_determinism: value: false generation_config: value: null generation_max_length: value: 228 generation_num_beams: value: null gradient_accumulation_steps: value: 1 gradient_checkpointing: value: true gradient_checkpointing_kwargs: value: null greater_is_better: value: false group_by_length: value: false half_precision_backend: value: auto hub_always_push: value: false hub_model_id: value: null hub_private_repo: value: false hub_strategy: value: every_save hub_token: value: id2label: value: "0": LABEL_0 "1": LABEL_1 ignore_data_skip: value: false include_for_metrics: value: [] include_inputs_for_metrics: value: false include_num_input_tokens_seen: value: false include_tokens_per_second: value: false init_std: value: 0.02 is_decoder: value: false is_encoder_decoder: value: true jit_mode_eval: value: false label_names: value: null label_smoothing_factor: value: 0 label2id: value: LABEL_0: 0 LABEL_1: 1 learning_rate: value: 4.375e-06 length_column_name: value: input_length length_penalty: value: 1 load_best_model_at_end: value: true local_rank: value: 0 log_level: value: passive log_level_replica: value: warning log_on_each_node: value: true logging_dir: value: ./runs/Oct07_10-22-04_tknika logging_first_step: value: false logging_nan_inf_filter: value: true logging_steps: value: 25 logging_strategy: value: steps lr_scheduler_type: value: linear mask_feature_length: value: 10 mask_feature_min_masks: value: 0 mask_feature_prob: value: 0 mask_time_length: value: 10 mask_time_min_masks: value: 2 mask_time_prob: value: 0.05 max_grad_norm: value: 1 max_length: value: 448 max_source_positions: value: 1500 max_steps: value: 1000 max_target_positions: value: 448 median_filter_width: value: 7 metric_for_best_model: value: wer min_length: value: 0 model/num_parameters: value: 1543490560 model_type: value: whisper mp_parameters: value: "" neftune_noise_alpha: value: null no_cuda: value: false no_repeat_ngram_size: value: 0 num_beam_groups: value: 1 num_beams: value: 1 num_hidden_layers: value: 32 num_mel_bins: value: 128 num_return_sequences: value: 1 num_train_epochs: value: 3 optim: value: adamw_torch optim_args: value: null optim_target_modules: value: null output_attentions: value: false output_dir: value: ./ output_hidden_states: value: false output_scores: value: false overwrite_output_dir: value: true pad_token_id: value: 50256 past_index: value: -1 per_device_eval_batch_size: value: 8 per_device_train_batch_size: value: 16 per_gpu_eval_batch_size: value: null per_gpu_train_batch_size: value: null predict_with_generate: value: true prediction_loss_only: value: false prefix: value: null problem_type: value: null push_to_hub: value: true push_to_hub_model_id: value: null push_to_hub_organization: value: null push_to_hub_token: value: ray_scope: value: last remove_invalid_values: value: false remove_unused_columns: value: true repetition_penalty: value: 1 report_to: value: - wandb restore_callback_states_from_checkpoint: value: false resume_from_checkpoint: value: ./checkpoint-9000/ return_dict: value: true return_dict_in_generate: value: false run_name: value: whisper-large-eu save_on_each_node: value: false save_only_model: value: false save_safetensors: value: true save_steps: value: 1000 save_strategy: value: steps save_total_limit: value: null scale_embedding: value: false seed: value: 42 sep_token_id: value: null skip_memory_metrics: value: true sortish_sampler: value: false split_batches: value: null suppress_tokens: value: null task_specific_params: value: null temperature: value: 1 tf_legacy_loss: value: false tf32: value: null tie_encoder_decoder: value: false tie_word_embeddings: value: true tokenizer_class: value: null top_k: value: 50 top_p: value: 1 torch_compile: value: false torch_compile_backend: value: null torch_compile_mode: value: null torch_dtype: value: float16 torch_empty_cache_steps: value: null torchdynamo: value: null torchscript: value: false tpu_metrics_debug: value: false tpu_num_cores: value: null transformers_version: value: 4.46.0.dev0 typical_p: value: 1 use_bfloat16: value: false use_cache: value: false use_cpu: value: false use_ipex: value: false use_legacy_prediction_loop: value: false use_liger_kernel: value: false use_mps_device: value: false use_weighted_layer_sum: value: false vocab_size: value: 51866 warmup_ratio: value: 0 warmup_steps: value: 500 weight_decay: value: 0