aws-samples · amindm · Aug 25, 2025 · Oct 17, 2025
diff --git a/21_spectrum_finetuning/spectrum_training.ipynb b/21_spectrum_finetuning/spectrum_training.ipynb
diff --git a/22_dpo_alignment_trl_sagemaker/.DS_Store b/22_dpo_alignment_trl_sagemaker/.DS_Store
diff --git a/22_dpo_alignment_trl_sagemaker/run_training_job.ipynb b/22_dpo_alignment_trl_sagemaker/run_training_job.ipynb
diff --git a/22_dpo_alignment_trl_sagemaker/scripts/.DS_Store b/22_dpo_alignment_trl_sagemaker/scripts/.DS_Store
diff --git a/22_dpo_alignment_trl_sagemaker/scripts/receipes/.DS_Store b/22_dpo_alignment_trl_sagemaker/scripts/receipes/.DS_Store
diff --git a/...r/scripts/receipes/Qwen3-4B-spectrum.yaml → ...scripts/receipes/Qwen3-0.6B-spectrum.yaml b/...r/scripts/receipes/Qwen3-4B-spectrum.yaml → ...scripts/receipes/Qwen3-0.6B-spectrum.yaml
@@ -1,24 +1,24 @@
 # Model arguments
-model_name_or_path: Qwen/Qwen3-4B
-tokenizer_name_or_path: Qwen/Qwen3-4B
+model_name_or_path: Qwen/Qwen3-0.6B
+tokenizer_name_or_path: Qwen/Qwen3-0.6B
 model_revision: main
 torch_dtype: bfloat16
 attn_implementation: flash_attention_2
-use_liger: false
+#use_liger: false
 bf16: true
 tf32: true
-output_dir: runs/spectrum-Qwen-3-4B
+output_dir: /opt/ml/model/Qwen3-0.6B-function-calling
 
 # Dataset arguments
-dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_sft.json
+dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json
 max_seq_length: 2048
 packing: true
 
 # Spectrum arguments
-spectrum_config_path: /home/sagemaker-user/DPO/spectrum-layer/snr_results_Qwen-Qwen3-4B_unfrozenparameters_50percent.yaml
+spectrum_config_path: /opt/ml/input/data/code/spectrum-layer/snr_results_Qwen-Qwen3-0.6B_unfrozenparameters_50percent.yaml
 
 # Training arguments
-num_train_epochs: 50
+num_train_epochs: 5
 per_device_train_batch_size: 4
 gradient_accumulation_steps: 2
 gradient_checkpointing: true

diff --git a/.../receipes/Meta-Llama-3.2-3B-spectrum.yaml → ...scripts/receipes/Qwen3-1.7B-spectrum.yaml b/.../receipes/Meta-Llama-3.2-3B-spectrum.yaml → ...scripts/receipes/Qwen3-1.7B-spectrum.yaml
@@ -1,39 +1,39 @@
 # Model arguments
-model_name_or_path: meta-llama/Llama-3.2-3B
-tokenizer_name_or_path: meta-llama/Llama-3.2-3B
+model_name_or_path: Qwen/Qwen3-1.7B
+tokenizer_name_or_path: Qwen/Qwen3-1.7B
 model_revision: main
 torch_dtype: bfloat16
 attn_implementation: flash_attention_2
-use_liger: true
+#use_liger: false
 bf16: true
 tf32: true
-output_dir: runs/spectrum-llama-3-2-3B
+output_dir: /opt/ml/model/Qwen3-1.7B-function-calling
 
 # Dataset arguments
-dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_sft.json
+dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json
 max_seq_length: 2048
 packing: true
 
 # Spectrum arguments
-spectrum_config_path: /home/sagemaker-user/DPO/spectrum-layer/snr_results_meta-llama-Llama-3.2-3B_unfrozenparameters_50percent.yaml
+spectrum_config_path: /opt/ml/input/data/code/spectrum-layer/snr_results_Qwen-Qwen3-1.7B_unfrozenparameters_50percent.yaml
 
 # Training arguments
-num_train_epochs: 50
-per_device_train_batch_size: 8
+num_train_epochs: 5
+per_device_train_batch_size: 4
 gradient_accumulation_steps: 2
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
   use_reentrant: true
 learning_rate: 5.0e-5
 lr_scheduler_type: cosine
-warmup_ratio: 0.1
+weight_decay: 0.01
 
 # Logging arguments
 logging_strategy: steps
 logging_steps: 5
 report_to:
 - wandb
-save_strategy: "no"
+save_strategy: "no" # "epoch"
 seed: 42
 
 # Hugging Face Hub 

diff --git a/22_dpo_alignment_trl_sagemaker/scripts/receipes/dpo-llama-3-1-8b.yaml b/22_dpo_alignment_trl_sagemaker/scripts/receipes/dpo-llama-3-1-8b.yaml
diff --git a/22_dpo_alignment_trl_sagemaker/scripts/receipes/sft-dpo-llama-3-2-3b-2.yaml b/22_dpo_alignment_trl_sagemaker/scripts/receipes/sft-dpo-llama-3-2-3b-2.yaml
diff --git a/...er/scripts/receipes/dpo-llama-3-2-3b.yaml → .../scripts/receipes/sft-dpo-qwen3-0.6b.yaml b/...er/scripts/receipes/dpo-llama-3-2-3b.yaml → .../scripts/receipes/sft-dpo-qwen3-0.6b.yaml
@@ -1,25 +1,25 @@
 # Model arguments
-model_name_or_path: meta-llama/Llama-3.2-3B
-tokenizer_name_or_path: meta-llama/Llama-3.2-3B-Instruct
+model_name_or_path: /opt/ml/input/model/Qwen3-0.6B-function-calling/
+tokenizer_name_or_path: Qwen/Qwen3-0.6B
 model_revision: main
 torch_dtype: bfloat16
 attn_implementation: flash_attention_2
 #use_liger: false
 bf16: true
 tf32: true
-output_dir: runs/dpo-llama-3-2-3b-function-calling
+output_dir:  /opt/ml/model/sft-dpo-qwen-3-0.6b-function-calling
 
 # Dataset arguments
-dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_pref.json
+dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json
 
 # Training arguments
 beta: 0.1
 max_length: 1536
 max_prompt_length: 768
 loss_type: sigmoid # default loss, alternatives: https://huggingface.co/docs/trl/dpo_trainer#loss-functions
-num_train_epochs: 3
+num_train_epochs: 1
 per_device_train_batch_size: 2 
-gradient_accumulation_steps: 8
+gradient_accumulation_steps: 2
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
   use_reentrant: true
@@ -31,11 +31,11 @@ warmup_ratio: 0.03
 logging_strategy: steps
 logging_steps: 5
 report_to:
-- tensorboard
+- wandb
 save_strategy: "no"
 seed: 42
 
 # Hugging Face Hub 
 push_to_hub: false
-  # hub_model_id: llama-3-1-8b-math-orca-qlora-10k-ep1 # if not defined same as output_dir
+  # hub_model_id:  # if not defined same as output_dir
 hub_strategy: every_save
diff --git a/...maker/scripts/receipes/dpo-qwen-3-4b.yaml → .../scripts/receipes/sft-dpo-qwen3-1.7b.yaml b/...maker/scripts/receipes/dpo-qwen-3-4b.yaml → .../scripts/receipes/sft-dpo-qwen3-1.7b.yaml
@@ -1,16 +1,16 @@
 # Model arguments
-model_name_or_path: Qwen/Qwen3-4B
-tokenizer_name_or_path: Qwen/Qwen3-4B
+model_name_or_path: /opt/ml/input/model/Qwen3-1.7B-function-calling/
+tokenizer_name_or_path: Qwen/Qwen3-1.7B
 model_revision: main
 torch_dtype: bfloat16
 attn_implementation: flash_attention_2
 #use_liger: false
 bf16: true
 tf32: true
-output_dir: runs/dpo-Qwen3-4B-function-calling
+output_dir:  /opt/ml/model/sft-dpo-qwen-3-1.7b-function-calling
 
 # Dataset arguments
-dataset_id_or_path: ../DPO/data/nvidia_When2Call_train_pref.json
+dataset_id_or_path: /opt/ml/input/data/dataset/dataset.json
 
 # Training arguments
 beta: 0.1
@@ -19,7 +19,7 @@ max_prompt_length: 768
 loss_type: sigmoid # default loss, alternatives: https://huggingface.co/docs/trl/dpo_trainer#loss-functions
 num_train_epochs: 3
 per_device_train_batch_size: 2 
-gradient_accumulation_steps: 8
+gradient_accumulation_steps: 2
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
   use_reentrant: true
@@ -31,7 +31,7 @@ warmup_ratio: 0.03
 logging_strategy: steps
 logging_steps: 5
 report_to:
-- tensorboard
+- wandb
 save_strategy: "no"
 seed: 42
 

diff --git a/22_dpo_alignment_trl_sagemaker/scripts/receipes/sft-llama-3-1-8b-qlora.yaml b/22_dpo_alignment_trl_sagemaker/scripts/receipes/sft-llama-3-1-8b-qlora.yaml
diff --git a/22_dpo_alignment_trl_sagemaker/scripts/receipes/sft-llama-3-2-3b-qlora-2.yaml b/22_dpo_alignment_trl_sagemaker/scripts/receipes/sft-llama-3-2-3b-qlora-2.yaml