Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14,086 changes: 13,739 additions & 347 deletions 21_spectrum_finetuning/spectrum_training.ipynb

Large diffs are not rendered by default.

Binary file added 22_dpo_alignment_trl_sagemaker/.DS_Store
Binary file not shown.
108,577 changes: 673 additions & 107,904 deletions 22_dpo_alignment_trl_sagemaker/run_training_job.ipynb

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
# Model arguments
model_name_or_path: Qwen/Qwen3-4B
tokenizer_name_or_path: Qwen/Qwen3-4B
model_name_or_path: Qwen/Qwen3-0.6B
tokenizer_name_or_path: Qwen/Qwen3-0.6B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
use_liger: false
#use_liger: false
bf16: true
tf32: true
output_dir: runs/spectrum-Qwen-3-4B
output_dir: /opt/ml/model/Qwen3-0.6B-function-calling

# Dataset arguments
dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_sft.json
dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json
max_seq_length: 2048
packing: true

# Spectrum arguments
spectrum_config_path: /home/sagemaker-user/DPO/spectrum-layer/snr_results_Qwen-Qwen3-4B_unfrozenparameters_50percent.yaml
spectrum_config_path: /opt/ml/input/data/code/spectrum-layer/snr_results_Qwen-Qwen3-0.6B_unfrozenparameters_50percent.yaml

# Training arguments
num_train_epochs: 50
num_train_epochs: 5
per_device_train_batch_size: 4
gradient_accumulation_steps: 2
gradient_checkpointing: true
Expand Down
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
# Model arguments
model_name_or_path: meta-llama/Llama-3.2-3B
tokenizer_name_or_path: meta-llama/Llama-3.2-3B
model_name_or_path: Qwen/Qwen3-1.7B
tokenizer_name_or_path: Qwen/Qwen3-1.7B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
use_liger: true
#use_liger: false
bf16: true
tf32: true
output_dir: runs/spectrum-llama-3-2-3B
output_dir: /opt/ml/model/Qwen3-1.7B-function-calling

# Dataset arguments
dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_sft.json
dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json
max_seq_length: 2048
packing: true

# Spectrum arguments
spectrum_config_path: /home/sagemaker-user/DPO/spectrum-layer/snr_results_meta-llama-Llama-3.2-3B_unfrozenparameters_50percent.yaml
spectrum_config_path: /opt/ml/input/data/code/spectrum-layer/snr_results_Qwen-Qwen3-1.7B_unfrozenparameters_50percent.yaml

# Training arguments
num_train_epochs: 50
per_device_train_batch_size: 8
num_train_epochs: 5
per_device_train_batch_size: 4
gradient_accumulation_steps: 2
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: true
learning_rate: 5.0e-5
lr_scheduler_type: cosine
warmup_ratio: 0.1
weight_decay: 0.01

# Logging arguments
logging_strategy: steps
logging_steps: 5
report_to:
- wandb
save_strategy: "no"
save_strategy: "no" # "epoch"
seed: 42

# Hugging Face Hub
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
# Model arguments
model_name_or_path: meta-llama/Llama-3.2-3B
tokenizer_name_or_path: meta-llama/Llama-3.2-3B-Instruct
model_name_or_path: /opt/ml/input/model/Qwen3-0.6B-function-calling/
tokenizer_name_or_path: Qwen/Qwen3-0.6B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
#use_liger: false
bf16: true
tf32: true
output_dir: runs/dpo-llama-3-2-3b-function-calling
output_dir: /opt/ml/model/sft-dpo-qwen-3-0.6b-function-calling

# Dataset arguments
dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_pref.json
dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json

# Training arguments
beta: 0.1
max_length: 1536
max_prompt_length: 768
loss_type: sigmoid # default loss, alternatives: https://huggingface.co/docs/trl/dpo_trainer#loss-functions
num_train_epochs: 3
num_train_epochs: 1
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
gradient_accumulation_steps: 2
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: true
Expand All @@ -31,11 +31,11 @@ warmup_ratio: 0.03
logging_strategy: steps
logging_steps: 5
report_to:
- tensorboard
- wandb
save_strategy: "no"
seed: 42

# Hugging Face Hub
push_to_hub: false
# hub_model_id: llama-3-1-8b-math-orca-qlora-10k-ep1 # if not defined same as output_dir
# hub_model_id: # if not defined same as output_dir
hub_strategy: every_save
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# Model arguments
model_name_or_path: Qwen/Qwen3-4B
tokenizer_name_or_path: Qwen/Qwen3-4B
model_name_or_path: /opt/ml/input/model/Qwen3-1.7B-function-calling/
tokenizer_name_or_path: Qwen/Qwen3-1.7B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
#use_liger: false
bf16: true
tf32: true
output_dir: runs/dpo-Qwen3-4B-function-calling
output_dir: /opt/ml/model/sft-dpo-qwen-3-1.7b-function-calling

# Dataset arguments
dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_pref.json
dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json

# Training arguments
beta: 0.1
Expand All @@ -19,7 +19,7 @@ max_prompt_length: 768
loss_type: sigmoid # default loss, alternatives: https://huggingface.co/docs/trl/dpo_trainer#loss-functions
num_train_epochs: 3
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
gradient_accumulation_steps: 2
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: true
Expand All @@ -31,7 +31,7 @@ warmup_ratio: 0.03
logging_strategy: steps
logging_steps: 5
report_to:
- tensorboard
- wandb
save_strategy: "no"
seed: 42

Expand Down

This file was deleted.

This file was deleted.

Loading