NVIDIA · kevalmorabia97 · Jan 13, 2026 · Jan 13, 2026
diff --git a/tests/gpu/torch/_compress/test_compress.py b/tests/gpu/torch/_compress/test_compress.py
@@ -35,7 +35,7 @@
 
 def test_compress(project_root_path: Path, tmp_path: Path):
     spawn_multiprocess_job(
-        size=torch.cuda.device_count(),
+        size=min(torch.cuda.device_count(), 2),  # assertions configured for atmost 2 GPUs
         job=partial(_test_compress_multiprocess_job, project_root_path, tmp_path),
         backend="nccl",
     )
@@ -64,10 +64,9 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran
     #
     # Check assertions
     #
+    # assertions for the score_pruning_activations step 1
+    _assert_score_pruning_activations(puzzle_dir)
     if rank == 0:
-        # assertions for the score_pruning_activations step 1
-        _assert_score_pruning_activations(puzzle_dir)
-
         # assertions for the pruning_ckpts step 2
         assert (puzzle_dir / "ckpts/ffn_256_attn_no_op").exists()
 
@@ -103,20 +102,23 @@ def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, ran
 def _assert_score_pruning_activations(puzzle_dir: Path):
     """Assertions for the score_pruning_activations step 1."""
     rank = dist.rank()
+    size = dist.size()
     rank_filepath = f"pruning/pruning_scores/ffn_iterative/100samples_diverse_mini/rank_{rank}.pth"
     assert (puzzle_dir / rank_filepath).is_file()
 
     pruning_scores = torch.load(puzzle_dir / rank_filepath)
 
     layer_names = list(pruning_scores.keys())
-    assert len(layer_names) == 2
-
-    # Check specific values for layer 0
-    layer_0 = pruning_scores[layer_names[0]]
-    assert layer_0["score"][0].item() == 371
-    assert layer_0["channels_importance_ascending"][0].item() == 140
-
-    # Check specific values for layer 1
-    layer_1 = pruning_scores[layer_names[1]]
-    assert layer_1["score"][0].item() == 269
-    assert layer_1["channels_importance_ascending"][0].item() == 366
+    assert len(layer_names) == 2 // size
+
+    if size == 1 or rank == 0:
+        # Check specific values for layer 0
+        layer_0 = pruning_scores[layer_names[0]]
+        assert layer_0["score"][0].item() == 371
+        assert layer_0["channels_importance_ascending"][0].item() == 140
+
+    if size == 1 or rank == 1:
+        # Check specific values for layer 1
+        layer_1 = pruning_scores[layer_names[1 if size == 1 else 0]]
+        assert layer_1["score"][0].item() == 269
+        assert layer_1["channels_importance_ascending"][0].item() == 366
diff --git a/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py b/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py
@@ -297,40 +297,40 @@ def forward_loop(m):
         # TODO: Simplify it: this unit test is too long,
         # hard to read (the same set of assertions across different test cases with if-else).
 
-        assert len(pruning_scores["activations_per_rank"]) == 1
-        rank_0_activations = pruning_scores["activations_per_rank"][0]
+        assert len(pruning_scores["activations_per_rank"]) == size
+        activations = pruning_scores["activations_per_rank"][rank]
 
         # Test case 1: MHA - pruned ffn/4 (num_attention_heads=8, num_query_groups=8, ffn_div=4)
-        if pruned_ffn_div == 4:
+        if size == 1 and pruned_ffn_div == 4:
             # Layer scores
             _assert_approx(pruning_scores["layer_scores"], {1: 0.028923, 2: 0.046508})
 
             # Validate decoder.layers.0.mlp activations
-            mlp_0_acts = rank_0_activations["decoder.layers.0.mlp"]
+            mlp_0_acts = activations["decoder.layers.0.mlp"]
             _assert_approx(mlp_0_acts.min().item(), 0.000026)
             _assert_approx(mlp_0_acts.max().item(), 0.000729)
             _assert_approx(mlp_0_acts.mean().item(), 0.000201)
 
             # Validate decoder.layers.1.mlp activations
-            mlp_1_acts = rank_0_activations["decoder.layers.1.mlp"]
+            mlp_1_acts = activations["decoder.layers.1.mlp"]
             _assert_approx(mlp_1_acts.min().item(), 0.000022)
             _assert_approx(mlp_1_acts.max().item(), 0.000762)
             _assert_approx(mlp_1_acts.mean().item(), 0.000162)
 
         # Test case 2: GQA - pruned attention/2 (num_attention_heads=8, num_query_groups=4, attention_div=2)
-        elif pruned_num_attention_heads_div == 2 and pruned_ffn_div == 1:
+        elif size == 1 and pruned_num_attention_heads_div == 2 and pruned_ffn_div == 1:
             # Layer scores
             _assert_approx(pruning_scores["layer_scores"], {1: 0.028056, 2: 0.038353})
 
             # Validate decoder.layers.0.self_attention activations
-            attn_0_acts = rank_0_activations["decoder.layers.0.self_attention"]
+            attn_0_acts = activations["decoder.layers.0.self_attention"]
             assert attn_0_acts.shape == torch.Size([hidden_size])
             _assert_approx(attn_0_acts.min().item(), 0.010091)
             _assert_approx(attn_0_acts.max().item(), 0.023826)
             _assert_approx(attn_0_acts.mean().item(), 0.014548)
 
             # Validate decoder.layers.1.self_attention activations
-            attn_1_acts = rank_0_activations["decoder.layers.1.self_attention"]
+            attn_1_acts = activations["decoder.layers.1.self_attention"]
             assert attn_1_acts.shape == torch.Size([hidden_size])
             _assert_approx(attn_1_acts.min().item(), 0.009982)
             _assert_approx(attn_1_acts.max().item(), 0.035644)