diff --git a/tests/gpu/torch/_compress/test_compress.py b/tests/gpu/torch/_compress/test_compress.py index 24b8b8b2e..dd6e0eb5a 100644 --- a/tests/gpu/torch/_compress/test_compress.py +++ b/tests/gpu/torch/_compress/test_compress.py @@ -35,7 +35,7 @@ def test_compress(project_root_path: Path, tmp_path: Path): spawn_multiprocess_job( - size=torch.cuda.device_count(), + size=min(torch.cuda.device_count(), 2), # assertions configured for atmost 2 GPUs job=partial(_test_compress_multiprocess_job, project_root_path, tmp_path), backend="nccl", ) @@ -64,10 +64,9 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran # # Check assertions # + # assertions for the score_pruning_activations step 1 + _assert_score_pruning_activations(puzzle_dir) if rank == 0: - # assertions for the score_pruning_activations step 1 - _assert_score_pruning_activations(puzzle_dir) - # assertions for the pruning_ckpts step 2 assert (puzzle_dir / "ckpts/ffn_256_attn_no_op").exists() @@ -103,20 +102,23 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran def _assert_score_pruning_activations(puzzle_dir: Path): """Assertions for the score_pruning_activations step 1.""" rank = dist.rank() + size = dist.size() rank_filepath = f"pruning/pruning_scores/ffn_iterative/100samples_diverse_mini/rank_{rank}.pth" assert (puzzle_dir / rank_filepath).is_file() pruning_scores = torch.load(puzzle_dir / rank_filepath) layer_names = list(pruning_scores.keys()) - assert len(layer_names) == 2 - - # Check specific values for layer 0 - layer_0 = pruning_scores[layer_names[0]] - assert layer_0["score"][0].item() == 371 - assert layer_0["channels_importance_ascending"][0].item() == 140 - - # Check specific values for layer 1 - layer_1 = pruning_scores[layer_names[1]] - assert layer_1["score"][0].item() == 269 - assert layer_1["channels_importance_ascending"][0].item() == 366 + assert len(layer_names) == 2 // size + + if size == 1 or rank == 0: + # Check specific values for layer 0 + layer_0 = pruning_scores[layer_names[0]] + assert layer_0["score"][0].item() == 371 + assert layer_0["channels_importance_ascending"][0].item() == 140 + + if size == 1 or rank == 1: + # Check specific values for layer 1 + layer_1 = pruning_scores[layer_names[1 if size == 1 else 0]] + assert layer_1["score"][0].item() == 269 + assert layer_1["channels_importance_ascending"][0].item() == 366 diff --git a/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py b/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py index e58473e8a..46d48ea2b 100644 --- a/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py +++ b/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py @@ -297,40 +297,40 @@ def forward_loop(m): # TODO: Simplify it: this unit test is too long, # hard to read (the same set of assertions across different test cases with if-else). - assert len(pruning_scores["activations_per_rank"]) == 1 - rank_0_activations = pruning_scores["activations_per_rank"][0] + assert len(pruning_scores["activations_per_rank"]) == size + activations = pruning_scores["activations_per_rank"][rank] # Test case 1: MHA - pruned ffn/4 (num_attention_heads=8, num_query_groups=8, ffn_div=4) - if pruned_ffn_div == 4: + if size == 1 and pruned_ffn_div == 4: # Layer scores _assert_approx(pruning_scores["layer_scores"], {1: 0.028923, 2: 0.046508}) # Validate decoder.layers.0.mlp activations - mlp_0_acts = rank_0_activations["decoder.layers.0.mlp"] + mlp_0_acts = activations["decoder.layers.0.mlp"] _assert_approx(mlp_0_acts.min().item(), 0.000026) _assert_approx(mlp_0_acts.max().item(), 0.000729) _assert_approx(mlp_0_acts.mean().item(), 0.000201) # Validate decoder.layers.1.mlp activations - mlp_1_acts = rank_0_activations["decoder.layers.1.mlp"] + mlp_1_acts = activations["decoder.layers.1.mlp"] _assert_approx(mlp_1_acts.min().item(), 0.000022) _assert_approx(mlp_1_acts.max().item(), 0.000762) _assert_approx(mlp_1_acts.mean().item(), 0.000162) # Test case 2: GQA - pruned attention/2 (num_attention_heads=8, num_query_groups=4, attention_div=2) - elif pruned_num_attention_heads_div == 2 and pruned_ffn_div == 1: + elif size == 1 and pruned_num_attention_heads_div == 2 and pruned_ffn_div == 1: # Layer scores _assert_approx(pruning_scores["layer_scores"], {1: 0.028056, 2: 0.038353}) # Validate decoder.layers.0.self_attention activations - attn_0_acts = rank_0_activations["decoder.layers.0.self_attention"] + attn_0_acts = activations["decoder.layers.0.self_attention"] assert attn_0_acts.shape == torch.Size([hidden_size]) _assert_approx(attn_0_acts.min().item(), 0.010091) _assert_approx(attn_0_acts.max().item(), 0.023826) _assert_approx(attn_0_acts.mean().item(), 0.014548) # Validate decoder.layers.1.self_attention activations - attn_1_acts = rank_0_activations["decoder.layers.1.self_attention"] + attn_1_acts = activations["decoder.layers.1.self_attention"] assert attn_1_acts.shape == torch.Size([hidden_size]) _assert_approx(attn_1_acts.min().item(), 0.009982) _assert_approx(attn_1_acts.max().item(), 0.035644)