@@ -8,25 +8,48 @@ target triple = "x86_64-unknown-linux-gnu"
88define void @smax_call_uniform (ptr %dst , i64 %x ) {
99; CHECK-LABEL: define void @smax_call_uniform(
1010; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) {
11- ; CHECK-NEXT: [[ENTRY:.*]]:
11+ ; CHECK-NEXT: [[ENTRY:.*:]]
1212; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69
1313; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0
14- ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
15- ; CHECK: [[LOOP_HEADER]]:
16- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
17- ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
18- ; CHECK: [[ELSE]]:
19- ; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
20- ; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM]], i64 0)
21- ; CHECK-NEXT: br label %[[LOOP_LATCH]]
22- ; CHECK: [[LOOP_LATCH]]:
23- ; CHECK-NEXT: [[PREDPHI7:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
14+ ; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
15+ ; CHECK: [[VECTOR_PH]]:
16+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
17+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
18+ ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
19+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
20+ ; CHECK: [[VECTOR_BODY]]:
21+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ]
22+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
23+ ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
24+ ; CHECK: [[PRED_UREM_IF]]:
25+ ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
26+ ; CHECK: [[PRED_UREM_CONTINUE]]:
27+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
28+ ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
29+ ; CHECK: [[PRED_UREM_IF1]]:
30+ ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]]
31+ ; CHECK: [[PRED_UREM_CONTINUE2]]:
32+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
33+ ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]]
34+ ; CHECK: [[PRED_UREM_IF3]]:
35+ ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]]
36+ ; CHECK: [[PRED_UREM_CONTINUE4]]:
37+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
38+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]]
39+ ; CHECK: [[PRED_UREM_IF5]]:
40+ ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]]
41+ ; CHECK: [[PRED_UREM_CONTINUE6]]:
42+ ; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0)
43+ ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]]
2444; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1
2545; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
2646; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
27- ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], 1
47+ ; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
48+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2849; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
29- ; CHECK-NEXT: br i1 [[TMP20]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
50+ ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
51+ ; CHECK: [[MIDDLE_BLOCK]]:
52+ ; CHECK-NEXT: br label %[[EXIT:.*]]
3053; CHECK: [[EXIT]]:
3154; CHECK-NEXT: ret void
3255;
5881}
5982
6083declare i64 @llvm.smax.i64 (i64 , i64 )
84+ ;.
85+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
86+ ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
87+ ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
88+ ;.
0 commit comments