annapurnapadmaprema-ji's picture
Upload 278 files
2631d60 verified
raw
history blame
1.29 kB
# @package __global__
classifier_free_guidance:
training_dropout: 0.1
inference_coef: 3.0
attribute_dropout:
args:
active_on_eval: false
text:
description: 0.4
wav:
self_wav: 0.4
fuser:
cross_attention_pos_emb: false
cross_attention_pos_emb_scale: 1
sum: []
prepend: [self_wav, description]
cross: []
input_interpolate: []
conditioners:
self_wav:
model: style
style:
model_name: mert
transformer_scale: default
sample_rate: ${sample_rate}
encodec_checkpoint: '//pretrained/facebook/encodec_32khz'
encodec_n_q: 3
length: 3.0
ds_factor: 15 # Since MERT is 75Hz, 75/15 results into 5Hz representations
n_q_out: 6
eval_q: 3
q_dropout: true
bins: 1024
varying_lengths: [1.5, 4.5]
batch_norm: true
compute_mask: true
num_codebooks_lm: ${transformer_lm.n_q}
ds_rate_compression: 640
use_middle_of_segment: false
rvq_threshold_ema_dead_code: 0.1
description:
model: t5
t5:
name: t5-base
finetune: false
word_dropout: 0.2
normalize_text: false
dataset:
train:
merge_text_p: 0.25
drop_desc_p: 0.5
drop_other_p: 0.5
shuffle: true