-
Notifications
You must be signed in to change notification settings - Fork 1
/
dpo_config_template.yaml
executable file
·45 lines (45 loc) · 1.48 KB
/
dpo_config_template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
model: "base_model_from_huggingface"
continued_training: False
access_token: null
output_dir: "output_directory"
stages:
- stage1:
epochs: 1.0
datasets:
- dataset1:
accepted: path to accepted dataset 1
rejected: path to rejected dataset 1
n_samples: 800 # Must not be greater than total number of samples
- dataset2: # All samples will be used
accepted: path to accepted dataset 2
rejected: path to rejected dataset 2
- stage2:
epochs: 0.5
datasets:
- dataset3: # All samples will be used
accepted: path to accepted dataset 3
rejected: path to rejected dataset 3
val_dataset_file: null
lora_r: 64
lora_alpha: 64
lora_dropout: 0 # Any is okay, but 0 is optimized in unsloth
lora_target_modules: ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"]
modules_to_save: ["embed_tokens", "lm_head"]
micro_batch_size: 1
gradient_accumulation_steps: 64
max_grad_norm: 1.0
learning_rate: 0.0001
lr_scheduler_type: cosine_with_restarts
lr_scheduler_kwargs:
num_cycles: 2
warmup_steps: 120
max_ctx_len: 8192
logging_steps: 5
eval_steps: 50
save_steps: 500
save_total_limit: 8
add_imstart_token: False
map_eos_to_imend: False # Sometimes tokenizer does not work properly with *replaced* eos token, this is a workaround
load_in_4bit: False
cpu_offload_embeddings: True # Offload original embed_tokens and lm_head when they are in modules_to_save