From fc9cd3272b50f4ee9f18c4ab82c278bbb014d99f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 14 Jul 2024 20:49:03 +0100 Subject: [PATCH] [VPlan] Don't add live-outs for IV phis. Resume and exit values for inductions are currently still created outside of VPlan and independent of the induction recipes. Don't add live-outs for now, as the additional unneeded users can pessimize other anlysis. Fixes https://github.com/llvm/llvm-project/issues/98660. --- .../Transforms/Vectorize/LoopVectorize.cpp | 8 + .../AArch64/sve-live-out-pointer-induction.ll | 20 -- .../LoopVectorize/X86/ephemeral-recipes.ll | 308 +----------------- .../LoopVectorize/X86/iv-live-outs.ll | 104 ++++++ .../LoopVectorize/iv_outside_user.ll | 3 +- ...o-fold-tail-by-masking-iv-external-uses.ll | 3 - .../LoopVectorize/pr58811-scev-expansion.ll | 27 -- .../pr59319-loop-access-info-invalidation.ll | 4 - 8 files changed, 116 insertions(+), 361 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7d37d67cde29c1..5520baef7152d9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8693,6 +8693,14 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop, Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan); + // Exit values for inductions are computed and updated outside of VPlan and + // independent of induction recipes. + // TODO: Compute induction exit values in VPlan, use VPLiveOuts to update + // live-outs. + if ((isa(V) && + !cast(V)->getTruncInst()) || + isa(V)) + continue; Plan.addLiveOut(&ExitPhi, V); } } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 8b64d7a083662e..071d518599caca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -27,26 +27,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 8, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP18:%.*]] = add [[DOTSPLAT]], [[TMP17]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP18]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP13]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 -; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector [[DOTSPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP22:%.*]] = add [[DOTSPLAT6]], [[TMP21]] -; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP22]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() @@ -63,7 +44,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 8 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP35]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll b/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll index 450caccefb7584..8cee513b1802b2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll @@ -8,313 +8,17 @@ define i32 @ephemeral_load_and_compare_iv_used_outside(ptr %start, ptr %end) #0 ; CHECK-LABEL: define i32 @ephemeral_load_and_compare_iv_used_outside( ; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[START1]], [[END2]] -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 128 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 128 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP4]], i32 4, <32 x i1> , <32 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP5]], i32 4, <32 x i1> , <32 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP6]], i32 4, <32 x i1> , <32 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP7]], i32 4, <32 x i1> , <32 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER3]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER4]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i1> [[TMP8]], i32 2 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <32 x i1> [[TMP8]], i32 3 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i1> [[TMP8]], i32 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]]) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i1> [[TMP8]], i32 5 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]]) -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i1> [[TMP8]], i32 6 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP18]]) -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i1> [[TMP8]], i32 7 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP19]]) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i1> [[TMP8]], i32 8 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP20]]) -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP8]], i32 9 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP21]]) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i1> [[TMP8]], i32 10 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP8]], i32 11 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i1> [[TMP8]], i32 12 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP24]]) -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP8]], i32 13 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP25]]) -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i1> [[TMP8]], i32 14 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP8]], i32 15 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP27]]) -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i1> [[TMP8]], i32 16 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP28]]) -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP8]], i32 17 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP29]]) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i1> [[TMP8]], i32 18 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP30]]) -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP8]], i32 19 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP31]]) -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i1> [[TMP8]], i32 20 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP32]]) -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i1> [[TMP8]], i32 21 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP33]]) -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i1> [[TMP8]], i32 22 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP34]]) -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i1> [[TMP8]], i32 23 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP35]]) -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i1> [[TMP8]], i32 24 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP36]]) -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP8]], i32 25 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP37]]) -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i1> [[TMP8]], i32 26 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP38]]) -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <32 x i1> [[TMP8]], i32 27 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP39]]) -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i1> [[TMP8]], i32 28 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP40]]) -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP8]], i32 29 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP41]]) -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i1> [[TMP8]], i32 30 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP42]]) -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i1> [[TMP8]], i32 31 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP43]]) -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP44]]) -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <32 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP45]]) -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i1> [[TMP9]], i32 2 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP46]]) -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP9]], i32 3 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP47]]) -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i1> [[TMP9]], i32 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP48]]) -; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i1> [[TMP9]], i32 5 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP49]]) -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i1> [[TMP9]], i32 6 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP50]]) -; CHECK-NEXT: [[TMP51:%.*]] = extractelement <32 x i1> [[TMP9]], i32 7 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP51]]) -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i1> [[TMP9]], i32 8 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP52]]) -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i1> [[TMP9]], i32 9 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP53]]) -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i1> [[TMP9]], i32 10 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP54]]) -; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP9]], i32 11 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP55]]) -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i1> [[TMP9]], i32 12 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP56]]) -; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP9]], i32 13 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP57]]) -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i1> [[TMP9]], i32 14 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP58]]) -; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP9]], i32 15 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP59]]) -; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i1> [[TMP9]], i32 16 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP60]]) -; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP9]], i32 17 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP61]]) -; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i1> [[TMP9]], i32 18 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP62]]) -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i1> [[TMP9]], i32 19 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP63]]) -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i1> [[TMP9]], i32 20 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP64]]) -; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i1> [[TMP9]], i32 21 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP65]]) -; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i1> [[TMP9]], i32 22 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP66]]) -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP9]], i32 23 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP67]]) -; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i1> [[TMP9]], i32 24 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP68]]) -; CHECK-NEXT: [[TMP69:%.*]] = extractelement <32 x i1> [[TMP9]], i32 25 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP69]]) -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i1> [[TMP9]], i32 26 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP70]]) -; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i1> [[TMP9]], i32 27 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP71]]) -; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i1> [[TMP9]], i32 28 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP72]]) -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i1> [[TMP9]], i32 29 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP73]]) -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i1> [[TMP9]], i32 30 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP74]]) -; CHECK-NEXT: [[TMP75:%.*]] = extractelement <32 x i1> [[TMP9]], i32 31 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP75]]) -; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP76]]) -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP77]]) -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <32 x i1> [[TMP10]], i32 2 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP78]]) -; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i1> [[TMP10]], i32 3 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP79]]) -; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i1> [[TMP10]], i32 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP80]]) -; CHECK-NEXT: [[TMP81:%.*]] = extractelement <32 x i1> [[TMP10]], i32 5 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP81]]) -; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i1> [[TMP10]], i32 6 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP82]]) -; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i1> [[TMP10]], i32 7 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP83]]) -; CHECK-NEXT: [[TMP84:%.*]] = extractelement <32 x i1> [[TMP10]], i32 8 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP84]]) -; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i1> [[TMP10]], i32 9 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP85]]) -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i1> [[TMP10]], i32 10 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP86]]) -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <32 x i1> [[TMP10]], i32 11 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP87]]) -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i1> [[TMP10]], i32 12 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP88]]) -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i1> [[TMP10]], i32 13 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP89]]) -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <32 x i1> [[TMP10]], i32 14 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP90]]) -; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i1> [[TMP10]], i32 15 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP91]]) -; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i1> [[TMP10]], i32 16 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP92]]) -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <32 x i1> [[TMP10]], i32 17 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP93]]) -; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i1> [[TMP10]], i32 18 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP94]]) -; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i1> [[TMP10]], i32 19 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP95]]) -; CHECK-NEXT: [[TMP96:%.*]] = extractelement <32 x i1> [[TMP10]], i32 20 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP96]]) -; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i1> [[TMP10]], i32 21 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP97]]) -; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i1> [[TMP10]], i32 22 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP98]]) -; CHECK-NEXT: [[TMP99:%.*]] = extractelement <32 x i1> [[TMP10]], i32 23 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP99]]) -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <32 x i1> [[TMP10]], i32 24 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP100]]) -; CHECK-NEXT: [[TMP101:%.*]] = extractelement <32 x i1> [[TMP10]], i32 25 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP101]]) -; CHECK-NEXT: [[TMP102:%.*]] = extractelement <32 x i1> [[TMP10]], i32 26 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP102]]) -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <32 x i1> [[TMP10]], i32 27 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP103]]) -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <32 x i1> [[TMP10]], i32 28 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP104]]) -; CHECK-NEXT: [[TMP105:%.*]] = extractelement <32 x i1> [[TMP10]], i32 29 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP105]]) -; CHECK-NEXT: [[TMP106:%.*]] = extractelement <32 x i1> [[TMP10]], i32 30 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP106]]) -; CHECK-NEXT: [[TMP107:%.*]] = extractelement <32 x i1> [[TMP10]], i32 31 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP107]]) -; CHECK-NEXT: [[TMP108:%.*]] = extractelement <32 x i1> [[TMP11]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP108]]) -; CHECK-NEXT: [[TMP109:%.*]] = extractelement <32 x i1> [[TMP11]], i32 1 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP109]]) -; CHECK-NEXT: [[TMP110:%.*]] = extractelement <32 x i1> [[TMP11]], i32 2 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP110]]) -; CHECK-NEXT: [[TMP111:%.*]] = extractelement <32 x i1> [[TMP11]], i32 3 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP111]]) -; CHECK-NEXT: [[TMP112:%.*]] = extractelement <32 x i1> [[TMP11]], i32 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP112]]) -; CHECK-NEXT: [[TMP113:%.*]] = extractelement <32 x i1> [[TMP11]], i32 5 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP113]]) -; CHECK-NEXT: [[TMP114:%.*]] = extractelement <32 x i1> [[TMP11]], i32 6 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP114]]) -; CHECK-NEXT: [[TMP115:%.*]] = extractelement <32 x i1> [[TMP11]], i32 7 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP115]]) -; CHECK-NEXT: [[TMP116:%.*]] = extractelement <32 x i1> [[TMP11]], i32 8 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP116]]) -; CHECK-NEXT: [[TMP117:%.*]] = extractelement <32 x i1> [[TMP11]], i32 9 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP117]]) -; CHECK-NEXT: [[TMP118:%.*]] = extractelement <32 x i1> [[TMP11]], i32 10 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP118]]) -; CHECK-NEXT: [[TMP119:%.*]] = extractelement <32 x i1> [[TMP11]], i32 11 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP119]]) -; CHECK-NEXT: [[TMP120:%.*]] = extractelement <32 x i1> [[TMP11]], i32 12 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP120]]) -; CHECK-NEXT: [[TMP121:%.*]] = extractelement <32 x i1> [[TMP11]], i32 13 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP121]]) -; CHECK-NEXT: [[TMP122:%.*]] = extractelement <32 x i1> [[TMP11]], i32 14 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP122]]) -; CHECK-NEXT: [[TMP123:%.*]] = extractelement <32 x i1> [[TMP11]], i32 15 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP123]]) -; CHECK-NEXT: [[TMP124:%.*]] = extractelement <32 x i1> [[TMP11]], i32 16 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP124]]) -; CHECK-NEXT: [[TMP125:%.*]] = extractelement <32 x i1> [[TMP11]], i32 17 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP125]]) -; CHECK-NEXT: [[TMP126:%.*]] = extractelement <32 x i1> [[TMP11]], i32 18 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP126]]) -; CHECK-NEXT: [[TMP127:%.*]] = extractelement <32 x i1> [[TMP11]], i32 19 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP127]]) -; CHECK-NEXT: [[TMP128:%.*]] = extractelement <32 x i1> [[TMP11]], i32 20 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP128]]) -; CHECK-NEXT: [[TMP129:%.*]] = extractelement <32 x i1> [[TMP11]], i32 21 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP129]]) -; CHECK-NEXT: [[TMP130:%.*]] = extractelement <32 x i1> [[TMP11]], i32 22 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP130]]) -; CHECK-NEXT: [[TMP131:%.*]] = extractelement <32 x i1> [[TMP11]], i32 23 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP131]]) -; CHECK-NEXT: [[TMP132:%.*]] = extractelement <32 x i1> [[TMP11]], i32 24 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP132]]) -; CHECK-NEXT: [[TMP133:%.*]] = extractelement <32 x i1> [[TMP11]], i32 25 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP133]]) -; CHECK-NEXT: [[TMP134:%.*]] = extractelement <32 x i1> [[TMP11]], i32 26 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP134]]) -; CHECK-NEXT: [[TMP135:%.*]] = extractelement <32 x i1> [[TMP11]], i32 27 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP135]]) -; CHECK-NEXT: [[TMP136:%.*]] = extractelement <32 x i1> [[TMP11]], i32 28 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP136]]) -; CHECK-NEXT: [[TMP137:%.*]] = extractelement <32 x i1> [[TMP11]], i32 29 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP137]]) -; CHECK-NEXT: [[TMP138:%.*]] = extractelement <32 x i1> [[TMP11]], i32 30 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP138]]) -; CHECK-NEXT: [[TMP139:%.*]] = extractelement <32 x i1> [[TMP11]], i32 31 -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP139]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 -1024 -; CHECK-NEXT: [[TMP140:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP140]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP141:%.*]] = mul i64 [[CMO]], -8 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP141]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = getelementptr nusw i8, ptr [[IV]], i64 -8 ; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[IV]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[L1]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]] -; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi ptr [ [[IV]], %[[LOOP]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi ptr [ [[IV]], %[[LOOP]] ] ; CHECK-NEXT: [[FINAL_LOAD:%.*]] = load i32, ptr [[IV_LCSSA]], align 4 ; CHECK-NEXT: ret i32 [[FINAL_LOAD]] ; @@ -375,9 +79,3 @@ exit: declare void @llvm.assume(i1 noundef) attributes #0 = { "target-cpu"="skylake-avx512" } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll new file mode 100644 index 00000000000000..738836d10c5a8c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define i64 @test_pr98660(ptr %dst, i64 %N) { +; CHECK-LABEL: define i64 @test_pr98660( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 24 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i32 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 24 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD2]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD3]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP13]], i32 4, <8 x i1> [[TMP17]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP14]], i32 4, <8 x i1> [[TMP18]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP15]], i32 4, <8 x i1> [[TMP19]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <8 x i1> [[TMP20]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[N_VEC]], 1 +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[OR]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; CHECK: [[THEN]]: +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RET:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RET]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %or = or disjoint i64 %iv, 1 + %gep = getelementptr i32, ptr %dst, i64 %or + %l = load i32, ptr %gep + %c = icmp eq i32 %l, 0 + br i1 %c, label %then, label %loop.latch + +then: + store i32 0, ptr %gep, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 1 + %ec = icmp ult i64 %iv, %N + br i1 %ec, label %loop.header, label %exit + +exit: + %ret = phi i64 [ %iv, %loop.latch ] + ret i64 %ret +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 93056ad209bf7a..bf27c146ec9ce1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -202,7 +202,6 @@ exit: ; %iv.2 is dead in the vector loop and only used outside the loop. -; FIXME: Scalar steps for iv.2 are not removed at the moment. define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; CHECK-LABEL: @iv_2_dead_in_loop_only_used_outside ; CHECK-LABEL: vector.body: @@ -210,7 +209,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-NEXT: [[VEC_IND:%.+]] = phi <2 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ] ; CHECK: [[IV_0:%.+]] = add i64 [[INDEX]], 0 ; VEC-NOT: add i64 [[INDEX]], 1 -; CHECK: [[IV_2_0:%.+]] = add i32 %offset.idx, 0 +; CHECK-NOT: add i32 %offset.idx, 0 ; CHECK-LABEL: scalar.ph: ; CHECK-NEXT: {{.+}} = phi i64 [ 1002, %middle.block ], [ 0, %entry ] ; CHECK-NEXT: {{.+}} = phi i32 [ 2004, %middle.block ], [ 0, %entry ] diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index 49058e443d6638..80a6bb50ca91b6 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -42,9 +42,6 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[TMP13]], -1 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll index 64fdefbb7cb670..c0eb4ccdd6d7e5 100644 --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -27,15 +27,6 @@ define void @test1_pr58811() { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -131,15 +122,6 @@ define void @test2_pr58811() { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]] -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -218,15 +200,6 @@ define void @test3_pr58811() { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 0, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 1, [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 2, [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 3, [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll index f3885b0b100e80..afb7d87bd17528 100644 --- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll @@ -16,10 +16,6 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]