diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c6e90e57e46edb..bcb60c65629670 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -25,6 +25,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -1758,6 +1759,53 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::ICMP_ULT, CostKind); return Cost; } + case Intrinsic::experimental_cttz_elts: { + EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true); + + // If we're not expanding the intrinsic then we assume this is cheap + // to implement. + if (!getTLI()->shouldExpandCttzElements(ArgType)) + return getTypeLegalizationCost(RetTy).first; + + // TODO: The costs below reflect the expansion code in + // SelectionDAGBuilder, but we may want to sacrifice some accuracy in + // favour of compile time. + + // Find the smallest "sensible" element type to use for the expansion. + bool ZeroIsPoison = !cast(Args[1])->isZero(); + ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64)); + if (isa(ICA.getArgTypes()[0]) && I && I->getCaller()) + VScaleRange = getVScaleRange(I->getCaller(), 64); + + unsigned EltWidth = getTLI()->getBitWidthForCttzElements( + RetTy, ArgType.getVectorElementCount(), ZeroIsPoison, &VScaleRange); + Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth); + + // Create the new vector type & get the vector length + Type *NewVecTy = VectorType::get( + NewEltTy, cast(Args[0]->getType())->getElementCount()); + + IntrinsicCostAttributes StepVecAttrs(Intrinsic::experimental_stepvector, + NewVecTy, {}, FMF); + InstructionCost Cost = + thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind); + + Cost += + thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind); + Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy, + Args[0]->getType(), + TTI::CastContextHint::None, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind); + + IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax, + NewEltTy, NewVecTy, FMF, I, 1); + Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind); + + return Cost; + } } // VP Intrinsics should have the same cost as their non-vp counterpart. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 7ed08cfa8a2022..50a8c7eb75af5b 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -470,6 +470,12 @@ class TargetLoweringBase { /// expanded using generic code in SelectionDAGBuilder. virtual bool shouldExpandCttzElements(EVT VT) const { return true; } + /// Return the minimum number of bits required to hold the maximum possible + /// number of trailing zero vector elements. + unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, + bool ZeroIsPoison, + const ConstantRange *VScaleRange) const; + // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to // vecreduce(op(x, y)) for the reduction opcode RedOpc. virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 55aed43070dfb6..9ef02a792fd09c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7861,20 +7861,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE); } - // Find the smallest "sensible" element type to use for the expansion. - ConstantRange CR( - APInt(64, OpVT.getVectorElementCount().getKnownMinValue())); - if (OpVT.isScalableVT()) - CR = CR.umul_sat(getVScaleRange(I.getCaller(), 64)); - // If the zero-is-poison flag is set, we can assume the upper limit // of the result is VF-1. - if (!cast(getValue(I.getOperand(1)))->isZero()) - CR = CR.subtract(APInt(64, 1)); - - unsigned EltWidth = I.getType()->getScalarSizeInBits(); - EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits()); - EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8); + bool ZeroIsPoison = + !cast(getValue(I.getOperand(1)))->isZero(); + ConstantRange VScaleRange(1, true); // Dummy value. + if (isa(I.getOperand(0)->getType())) + VScaleRange = getVScaleRange(I.getCaller(), 64); + unsigned EltWidth = TLI.getBitWidthForCttzElements( + I.getType(), OpVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange); MVT NewEltTy = MVT::getIntegerVT(EltWidth); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 75b3f14e962207..09b70cfb72278f 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1048,6 +1048,24 @@ bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, return TM.isNoopAddrSpaceCast(SrcAS, DestAS); } +unsigned TargetLoweringBase::getBitWidthForCttzElements( + Type *RetTy, ElementCount EC, bool ZeroIsPoison, + const ConstantRange *VScaleRange) const { + // Find the smallest "sensible" element type to use for the expansion. + ConstantRange CR(APInt(64, EC.getKnownMinValue())); + if (EC.isScalable()) + CR = CR.umul_sat(*VScaleRange); + + if (ZeroIsPoison) + CR = CR.subtract(APInt(64, 1)); + + unsigned EltWidth = RetTy->getScalarSizeInBits(); + EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits()); + EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8); + + return EltWidth; +} + void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { // If the command-line option was specified, ignore this request. if (!JumpIsExpensiveOverride.getNumOccurrences()) diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll new file mode 100644 index 00000000000000..01dc086d938538 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll @@ -0,0 +1,209 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define void @foo_no_vscale_range() { +; CHECK-LABEL: 'foo_no_vscale_range' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i64.v32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i32.v32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.v16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i64.v32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.v16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) + %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) + %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) + %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 true) + %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) + %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) + %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) + %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) + %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 true) + %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) + + %res.i64.v2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 true) + %res.i64.v4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 true) + %res.i64.v8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 true) + %res.i64.v16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v161(<16 x i1> undef, i1 true) + %res.i64.v32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 true) + %res.i32.v2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 true) + %res.i32.v4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 true) + %res.i32.v8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 true) + %res.i32.v16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v161(<16 x i1> undef, i1 true) + %res.i32.v32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 true) + + %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) + %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) + %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) + %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 false) + %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) + %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) + %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) + %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) + %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 false) + %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) + + %res.i64.v2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 false) + %res.i64.v4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 false) + %res.i64.v8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 false) + %res.i64.v16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v161(<16 x i1> undef, i1 false) + %res.i64.v32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 false) + %res.i32.v2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 false) + %res.i32.v4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 false) + %res.i32.v8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 false) + %res.i32.v16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v161(<16 x i1> undef, i1 false) + %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false) + + ret void +} + + +define void @foo_vscale_range_1_16() vscale_range(1,16) { +; CHECK-LABEL: 'foo_vscale_range_1_16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) + %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) + %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) + %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 true) + %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) + %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) + %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) + %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) + %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 true) + %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) + + %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) + %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) + %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) + %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 false) + %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) + %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) + %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) + %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) + %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 false) + %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) + + ret void +} + +define void @foo_vscale_range_1_16384() vscale_range(1,16384) { +; CHECK-LABEL: 'foo_vscale_range_1_16384' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) + %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) + %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) + %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 true) + %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) + %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) + %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) + %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) + %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 true) + %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) + + %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) + %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) + %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) + %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 false) + %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) + %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) + %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) + %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) + %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 false) + %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) + + ret void +} + +declare i64 @llvm.experimental.cttz.elts.i64.nxv2i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv4i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv8i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv16i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv32i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1>, i1) +declare i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1>, i1) +declare i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1>, i1) +declare i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1>, i1) +declare i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1>, i1) + +declare i32 @llvm.experimental.cttz.elts.i32.nxv2i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv4i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv8i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv32i1(, i1) diff --git a/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll b/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll new file mode 100644 index 00000000000000..ca09d027b547ba --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/cttz_elts.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s + +define void @foo_no_vscale_range() { +; CHECK-LABEL: 'foo_no_vscale_range' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 781 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 781 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) + %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) + %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) + %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 true) + %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) + %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 true) + %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 true) + %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) + %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) + %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) + %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 true) + %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) + %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 true) + %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 true) + + %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) + %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) + %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) + %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 false) + %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) + %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 false) + %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 false) + %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) + %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) + %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) + %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 false) + %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) + %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 false) + %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 false) + + ret void +} + + +define void @foo_vscale_range_2_16() vscale_range(2,16) { +; CHECK-LABEL: 'foo_vscale_range_2_16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 true) + %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 true) + %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 true) + %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 true) + %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 true) + %res.i64.nxv64i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 true) + %res.i64.nxv128i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 true) + %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 true) + %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 true) + %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 true) + %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 true) + %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 true) + %res.i32.nxv64i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 true) + %res.i32.nxv128i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 true) + + %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1( undef, i1 false) + %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( undef, i1 false) + %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1( undef, i1 false) + %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv161( undef, i1 false) + %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1( undef, i1 false) + %res.i64.nxv64i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv64i1( undef, i1 false) + %res.i64.nxv128i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv128i1( undef, i1 false) + %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1( undef, i1 false) + %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1( undef, i1 false) + %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1( undef, i1 false) + %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv161( undef, i1 false) + %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1( undef, i1 false) + %res.i32.nxv64i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv64i1( undef, i1 false) + %res.i32.nxv128i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv128i1( undef, i1 false) + + ret void +} + +declare i64 @llvm.experimental.cttz.elts.i64.nxv2i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv4i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv8i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv16i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv32i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv64i1(, i1) +declare i64 @llvm.experimental.cttz.elts.i64.nxv128i1(, i1) + +declare i32 @llvm.experimental.cttz.elts.i32.nxv2i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv4i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv8i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv32i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv64i1(, i1) +declare i32 @llvm.experimental.cttz.elts.i32.nxv128i1(, i1)