-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.yaml
65 lines (55 loc) · 1.4 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
opt:
lr: 1e-4
betas: [0.9, 0.999]
lr_decay: 0.995 # per epoch
data:
segment_size: 9600 # of target audio (should be divisible by upsampling rate (mult of upsamples))
sample_rate: 16000
target_sample_rate: 24000
hop_length: 320 # should match 20ms of source audio
n_fft: 1280 # for loudness
win_length: 1280 # for loudness
f_min: 50 # for f0
f_max: null # for f0
seed: 17
model:
feature_dims: [768, 1, 1] # ppg_dim, f0_dim, loud_dim
cond_dims: [512, 128] # spk_emb_dim, z_dim
generator:
hidden_dim: 768
n_blocks: 7
upsamples: [2, 2, 2, 3, 4, 5]
channel_divs: [1, 1, 2, 1, 1, 2, 2]
discriminator:
out_channels: [16, 64, 256, 1024, 1024, 1024, 1]
kernels: [15, 41, 41, 41, 41, 5, 3]
downsamples: [1, 2, 2, 4, 4, 1, 1]
lrelu_slope: 0.2
train:
batch_size: 16
epochs: 1000
num_workers: 4
n_gpu: null # will be defined on training
stdout_interval: 20
checkpoint_interval: 5000
summary_interval: 100
validation_interval: 1000
lambda_adv: 4.0
grad_norm_clip_value: 1.0
ckpt_dir: 'ckpts'
logs_dir: 'logs'
dist_config:
dist_backend: nccl
dist_url: tcp://localhost:54321
world_size: 1
dataset:
wav_dir: 'data/wavs'
ppg_dir: 'data/ppg'
f0_dir: 'data/f0'
loudness_dir: 'data/loudness'
spk_embs_file: 'data/spk_embs.pt'
train_list: 'data/train.list'
test_list: 'data/test.list'
hydra:
run:
dir: .