Skip to content

Commit 4ea8157

Browse files
committed
Revert "[VPlan] Remove legacy costing inside VPBlendRecipe::computeCost (#171846)"
This reverts commit fd5f53a. It's triggering legacy cost model assertions reported in #171846 (comment)
1 parent 95e4dc6 commit 4ea8157

File tree

3 files changed

+46
-18
lines changed

3 files changed

+46
-18
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7081,11 +7081,6 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
70817081
return true;
70827082
}
70837083

7084-
// The legacy cost model costs non-header phis with a scalar VF as a phi,
7085-
// but scalar unrolled VPlans will have VPBlendRecipes which emit selects.
7086-
if (VF.isScalar() && isa<VPBlendRecipe>(&R))
7087-
return true;
7088-
70897084
/// If a VPlan transform folded a recipe to one producing a single-scalar,
70907085
/// but the original instruction wasn't uniform-after-vectorization in the
70917086
/// legacy cost model, the legacy cost overestimates the actual cost.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,6 +2551,11 @@ void VPVectorPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
25512551

25522552
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
25532553
VPCostContext &Ctx) const {
2554+
// Handle cases where only the first lane is used the same way as the legacy
2555+
// cost model.
2556+
if (vputils::onlyFirstLaneUsed(this))
2557+
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2558+
25542559
Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
25552560
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
25562561
return (getNumIncomingValues() - 1) *

llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,48 @@ target triple = "x86_64-unknown-linux-gnu"
88
define void @smax_call_uniform(ptr %dst, i64 %x) {
99
; CHECK-LABEL: define void @smax_call_uniform(
1010
; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) {
11-
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[ENTRY:.*:]]
1212
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69
1313
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0
14-
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
15-
; CHECK: [[LOOP_HEADER]]:
16-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
17-
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
18-
; CHECK: [[ELSE]]:
19-
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
20-
; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM]], i64 0)
21-
; CHECK-NEXT: br label %[[LOOP_LATCH]]
22-
; CHECK: [[LOOP_LATCH]]:
23-
; CHECK-NEXT: [[PREDPHI7:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
14+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
15+
; CHECK: [[VECTOR_PH]]:
16+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
17+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
18+
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
19+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
20+
; CHECK: [[VECTOR_BODY]]:
21+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ]
22+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
23+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
24+
; CHECK: [[PRED_UREM_IF]]:
25+
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
26+
; CHECK: [[PRED_UREM_CONTINUE]]:
27+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
28+
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
29+
; CHECK: [[PRED_UREM_IF1]]:
30+
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]]
31+
; CHECK: [[PRED_UREM_CONTINUE2]]:
32+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
33+
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]]
34+
; CHECK: [[PRED_UREM_IF3]]:
35+
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]]
36+
; CHECK: [[PRED_UREM_CONTINUE4]]:
37+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
38+
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]]
39+
; CHECK: [[PRED_UREM_IF5]]:
40+
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]]
41+
; CHECK: [[PRED_UREM_CONTINUE6]]:
42+
; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0)
43+
; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]]
2444
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1
2545
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
2646
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
27-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], 1
47+
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
48+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2849
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
29-
; CHECK-NEXT: br i1 [[TMP20]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
50+
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
51+
; CHECK: [[MIDDLE_BLOCK]]:
52+
; CHECK-NEXT: br label %[[EXIT:.*]]
3053
; CHECK: [[EXIT]]:
3154
; CHECK-NEXT: ret void
3255
;
@@ -58,3 +81,8 @@ exit:
5881
}
5982

6083
declare i64 @llvm.smax.i64(i64, i64)
84+
;.
85+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
86+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
87+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
88+
;.

0 commit comments

Comments
 (0)