Skip to content

Commit ef21740

Browse files
authored
[LoopPeel] Check for onlyAccessesInaccessibleMemory instead of llvm.assume in peelToTurnInvariantLoadsDereferenceable. (#171910)
onlyAccessesInaccessibleMemory can't alias with a load. This allows us to ignore more intrinsics than llvm.assume. Follow up from #171547
1 parent 5cdb757 commit ef21740

File tree

2 files changed

+87
-3
lines changed

2 files changed

+87
-3
lines changed

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,10 @@ static unsigned peelToTurnInvariantLoadsDereferenceable(Loop &L,
447447
const DataLayout &DL = L.getHeader()->getDataLayout();
448448
for (BasicBlock *BB : L.blocks()) {
449449
for (Instruction &I : *BB) {
450-
// Don't consider llvm.assume as writing to memory.
450+
// Calls that only access inaccessible memory can never alias with loads.
451451
if (I.mayWriteToMemory() &&
452-
!(isa<IntrinsicInst>(I) &&
453-
cast<IntrinsicInst>(I).getIntrinsicID() == Intrinsic::assume))
452+
!(isa<CallBase>(I) &&
453+
cast<CallBase>(I).onlyAccessesInaccessibleMemory()))
454454
return 0;
455455

456456
if (LoadUsers.contains(&I))

llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,3 +709,87 @@ unreachable.exit:
709709
call void @foo()
710710
unreachable
711711
}
712+
713+
define i32 @peel_readonly_to_make_loads_derefenceable_sideeffect(ptr %ptr, i32 %N, ptr %inv, i1 %c.1) {
714+
; CHECK-LABEL: @peel_readonly_to_make_loads_derefenceable_sideeffect(
715+
; CHECK-NEXT: entry:
716+
; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]]
717+
; CHECK: loop.header.peel.begin:
718+
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]]
719+
; CHECK: loop.header.peel:
720+
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN_PEEL:%.*]], label [[UNREACHABLE_EXIT1:%.*]]
721+
; CHECK: then.peel:
722+
; CHECK-NEXT: [[I_PEEL:%.*]] = load i32, ptr [[INV:%.*]], align 4
723+
; CHECK-NEXT: call void @llvm.sideeffect()
724+
; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp ult i32 [[I_PEEL]], 2
725+
; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[LOOP_LATCH_PEEL:%.*]], label [[UNREACHABLE_EXIT1]]
726+
; CHECK: loop.latch.peel:
727+
; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i32 1
728+
; CHECK-NEXT: [[LV_PEEL:%.*]] = load i32, ptr [[GEP_PEEL]], align 4
729+
; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i32 0, [[LV_PEEL]]
730+
; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1
731+
; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000
732+
; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
733+
; CHECK: loop.header.peel.next:
734+
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]]
735+
; CHECK: loop.header.peel.next1:
736+
; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
737+
; CHECK: entry.peel.newph:
738+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
739+
; CHECK: loop.header:
740+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
741+
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ]
742+
; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[UNREACHABLE_EXIT:%.*]]
743+
; CHECK: then:
744+
; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[INV]], align 4
745+
; CHECK-NEXT: call void @llvm.sideeffect()
746+
; CHECK-NEXT: [[C_2:%.*]] = icmp ult i32 [[I]], 2
747+
; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[UNREACHABLE_EXIT]]
748+
; CHECK: loop.latch:
749+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[IV]]
750+
; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[GEP]], align 4
751+
; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
752+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
753+
; CHECK-NEXT: [[C_3:%.*]] = icmp samesign ult i32 [[IV]], 1000
754+
; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
755+
; CHECK: exit.loopexit:
756+
; CHECK-NEXT: [[SUM_NEXT_LCSSA1:%.*]] = phi i32 [ [[SUM_NEXT]], [[LOOP_LATCH]] ]
757+
; CHECK-NEXT: br label [[EXIT]]
758+
; CHECK: exit:
759+
; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_PEEL]], [[LOOP_LATCH_PEEL]] ], [ [[SUM_NEXT_LCSSA1]], [[EXIT_LOOPEXIT]] ]
760+
; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]]
761+
; CHECK: unreachable.exit.loopexit:
762+
; CHECK-NEXT: br label [[UNREACHABLE_EXIT1]]
763+
; CHECK: unreachable.exit:
764+
; CHECK-NEXT: call void @foo()
765+
; CHECK-NEXT: unreachable
766+
;
767+
entry:
768+
br label %loop.header
769+
770+
loop.header:
771+
%iv = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ]
772+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ]
773+
br i1 %c.1, label %then, label %unreachable.exit
774+
775+
then:
776+
%i = load i32, ptr %inv
777+
call void @llvm.sideeffect()
778+
%c.2 = icmp ult i32 %i, 2
779+
br i1 %c.2, label %loop.latch, label %unreachable.exit
780+
781+
loop.latch:
782+
%gep = getelementptr i32, ptr %ptr, i32 %iv
783+
%lv = load i32, ptr %gep
784+
%sum.next = add i32 %sum, %lv
785+
%iv.next = add nuw nsw i32 %iv, 1
786+
%c.3 = icmp ult i32 %iv, 1000
787+
br i1 %c.3, label %loop.header, label %exit
788+
789+
exit:
790+
ret i32 %sum.next
791+
792+
unreachable.exit:
793+
call void @foo()
794+
unreachable
795+
}

0 commit comments

Comments
 (0)