diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 7dac303b3b3..bf2d1833515 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4185,12 +4185,15 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SDLoc &DL, EVT VT, uint64_t Value, unsigned BitsPerElement) { // Signed 16-bit values can be replicated using VREPI. + // Mark the constants as opaque or DAGCombiner will convert back to + // BUILD_VECTOR. int64_t SignedValue = SignExtend64(Value, BitsPerElement); if (isInt<16>(SignedValue)) { MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, - DAG.getConstant(SignedValue, DL, MVT::i32)); + SDValue Op = DAG.getNode( + SystemZISD::REPLICATE, DL, VecVT, + DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } // See whether rotating the constant left some N places gives a value that @@ -4206,9 +4209,10 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, End -= 64 - BitsPerElement; MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, - DAG.getConstant(Start, DL, MVT::i32), - DAG.getConstant(End, DL, MVT::i32)); + SDValue Op = DAG.getNode( + SystemZISD::ROTATE_MASK, DL, VecVT, + DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), + DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } return SDValue(); @@ -4421,8 +4425,9 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, // priority over other methods below. uint64_t Mask = 0; if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32)); + SDValue Op = DAG.getNode( + SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } @@ -5605,28 +5610,293 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +// Return the demanded elements for the OpNo source operand of Op. DemandedElts +// are for Op. +static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, + unsigned OpNo) { + EVT VT = Op.getValueType(); + unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); + APInt SrcDemE; + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + // VECTOR PACK truncates the elements of two source vectors into one. + SrcDemE = DemandedElts; + if (OpNo == 2) + SrcDemE.lshrInPlace(NumElts / 2); + SrcDemE = SrcDemE.trunc(NumElts / 2); + break; + // VECTOR UNPACK extends half the elements of the source vector. + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + SrcDemE = APInt(NumElts * 2, 0); + SrcDemE.insertBits(DemandedElts, 0); + break; + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + SrcDemE = APInt(NumElts * 2, 0); + SrcDemE.insertBits(DemandedElts, NumElts); + break; + case Intrinsic::s390_vpdi: { + // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. + SrcDemE = APInt(NumElts, 0); + if (!DemandedElts[OpNo - 1]) + break; + unsigned Mask = cast(Op.getOperand(3))->getZExtValue(); + unsigned MaskBit = ((OpNo - 1) ? 1 : 4); + // Demand input element 0 or 1, given by the mask bit value. + SrcDemE.setBit((Mask & MaskBit)? 1 : 0); + break; + } + case Intrinsic::s390_vsldb: { + // VECTOR SHIFT LEFT DOUBLE BY BYTE + assert(VT == MVT::v16i8 && "Unexpected type."); + unsigned FirstIdx = cast(Op.getOperand(3))->getZExtValue(); + assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); + unsigned NumSrc0Els = 16 - FirstIdx; + SrcDemE = APInt(NumElts, 0); + if (OpNo == 1) { + APInt DemEls = DemandedElts.trunc(NumSrc0Els); + SrcDemE.insertBits(DemEls, FirstIdx); + } else { + APInt DemEls = DemandedElts.lshr(NumSrc0Els); + SrcDemE.insertBits(DemEls, 0); + } + break; + } + case Intrinsic::s390_vperm: + SrcDemE = APInt(NumElts, 1); + break; + default: + llvm_unreachable("Unhandled intrinsic."); + break; + } + } else { + switch (Opcode) { + case SystemZISD::JOIN_DWORDS: + // Scalar operand. + SrcDemE = APInt(1, 1); + break; + case SystemZISD::SELECT_CCMASK: + SrcDemE = DemandedElts; + break; + default: + llvm_unreachable("Unhandled opcode."); + break; + } + } + return SrcDemE; +} + +static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth, + unsigned OpNo) { + APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); + APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); + unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); + KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth); + DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1); + DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1); + Known.Zero = LHSKnown.Zero & RHSKnown.Zero; + Known.One = LHSKnown.One & RHSKnown.One; +} + void SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = Known.getBitWidth(); - Known.resetAll(); - switch (Op.getOpcode()) { - case SystemZISD::SELECT_CCMASK: { - KnownBits TrueKnown(BitWidth), FalseKnown(BitWidth); - DAG.computeKnownBits(Op.getOperand(0), TrueKnown, Depth + 1); - DAG.computeKnownBits(Op.getOperand(1), FalseKnown, Depth + 1); - Known.Zero = TrueKnown.Zero & FalseKnown.Zero; - Known.One = TrueKnown.One & FalseKnown.One; - break; + + // Intrinsic CC result is returned in the two low bits. + unsigned tmp0, tmp1; // not used + if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { + Known.Zero.setBitsFrom(2); + return; + } + EVT VT = Op.getValueType(); + if (Op.getResNo() != 0 || VT == MVT::Untyped) + return; + assert (Known.getBitWidth() == VT.getScalarSizeInBits() && + "KnownBits does not match VT in bitwidth"); + assert ((!VT.isVector() || + (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && + "DemandedElts does not match VT number of elements"); + unsigned BitWidth = Known.getBitWidth(); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + bool IsLogical = false; + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + case Intrinsic::s390_vpdi: + case Intrinsic::s390_vsldb: + case Intrinsic::s390_vperm: + computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); + break; + case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + IsLogical = true; + LLVM_FALLTHROUGH; + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: { + SDValue SrcOp = Op.getOperand(1); + unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits(); + Known = KnownBits(SrcBitWidth); + APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); + DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1); + if (IsLogical) { + Known = Known.zext(BitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); + } else + Known = Known.sext(BitWidth); + break; + } + default: + break; + } + } else { + switch (Opcode) { + case SystemZISD::JOIN_DWORDS: + case SystemZISD::SELECT_CCMASK: + computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); + break; + case SystemZISD::REPLICATE: { + SDValue SrcOp = Op.getOperand(0); + DAG.computeKnownBits(SrcOp, Known, Depth + 1); + if (Known.getBitWidth() < BitWidth && isa(SrcOp)) + Known = Known.sext(BitWidth); // VREPI sign extends the immedate. + break; + } + default: + break; + } } - default: - break; + // Known has the width of the source operand(s). Adjust if needed to match + // the passed bitwidth. + if (Known.getBitWidth() != BitWidth) + Known = Known.zextOrTrunc(BitWidth); +} + +static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth, + unsigned OpNo) { + APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); + unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); + if (LHS == 1) return 1; // Early out. + APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); + unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); + if (RHS == 1) return 1; // Early out. + unsigned Common = std::min(LHS, RHS); + unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + if (SrcBitWidth > VTBits) { // PACK + unsigned SrcExtraBits = SrcBitWidth - VTBits; + if (Common > SrcExtraBits) + return (Common - SrcExtraBits); + return 1; } + assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); + return Common; +} + +unsigned +SystemZTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + unsigned Depth) const { + if (Op.getResNo() != 0) + return 1; + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + case Intrinsic::s390_vpdi: + case Intrinsic::s390_vsldb: + case Intrinsic::s390_vperm: + return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: { + SDValue PackedOp = Op.getOperand(1); + APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); + unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); + return Tmp; + } + default: + break; + } + } else { + switch (Opcode) { + case SystemZISD::SELECT_CCMASK: + return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); + default: + break; + } + } + + return 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index fcb2643b99a..dc53d2e3531 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -499,6 +499,12 @@ public: const SelectionDAG &DAG, unsigned Depth = 0) const override; + /// Determine the number of bits in the operation that are sign bits. + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const override; + ISD::NodeType getExtendForAtomicOps() const override { return ISD::ANY_EXTEND; } diff --git a/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll b/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll new file mode 100644 index 00000000000..3bcbbb45581 --- /dev/null +++ b/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll @@ -0,0 +1,460 @@ +; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with +; vector intrinsics. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) + +; PACKS_CC (operand elements are 0): i64 -> i32 +define <4 x i32> @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> , <2 x i64> ) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, + ret <4 x i32> %and +} + +; PACKS_CC (operand elements are 1): i64 -> i32 +; NOTE: The vector AND is optimized away, but vrepig+vpksgs is used instead +; of vrepif. Similarly for more test cases below. +define <4 x i32> @f1() { +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpksgs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> , <2 x i64> ) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, + ret <4 x i32> %and +} + +; PACKS_CC (operand elements are 0): i32 -> i16 +define <8 x i16> @f2() { +; CHECK-LABEL: f2: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> , + <4 x i32> ) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, + ret <8 x i16> %and +} + +; PACKS_CC (operand elements are 1): i32 -> i16 +define <8 x i16> @f3() { +; CHECK-LABEL: f3: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpksfs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> , + <4 x i32> ) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, + ret <8 x i16> %and +} + +; PACKS_CC (operand elements are 0): i16 -> i8 +define <16 x i8> @f4() { +; CHECK-LABEL: f4: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs( + <8 x i16> , + <8 x i16> ) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, + ret <16 x i8> %and +} + +; PACKS_CC (operand elements are 1): i16 -> i8 +define <16 x i8> @f5() { +; CHECK-LABEL: f5: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpkshs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs( + <8 x i16> , + <8 x i16> ) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, + ret <16 x i8> %and +} + +declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) + +; PACKLS_CC (operand elements are 0): i64 -> i32 +define <4 x i32> @f6() { +; CHECK-LABEL: f6: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> , <2 x i64> ) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, + ret <4 x i32> %and +} + +; PACKLS_CC (operand elements are 1): i64 -> i32 +define <4 x i32> @f7() { +; CHECK-LABEL: f7: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpklsgs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> , <2 x i64> ) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, + ret <4 x i32> %and +} + +; PACKLS_CC (operand elements are 0): i32 -> i16 +define <8 x i16> @f8() { +; CHECK-LABEL: f8: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> , + <4 x i32> ) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, + ret <8 x i16> %and +} + +; PACKLS_CC (operand elements are 1): i32 -> i16 +define <8 x i16> @f9() { +; CHECK-LABEL: f9: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpklsfs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> , + <4 x i32> ) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, + ret <8 x i16> %and +} + +; PACKLS_CC (operand elements are 0): i16 -> i8 +define <16 x i8> @f10() { +; CHECK-LABEL: f10: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs( + <8 x i16> , + <8 x i16> ) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, + ret <16 x i8> %and +} + +; PACKLS_CC (operand elements are 1): i16 -> i8 +define <16 x i8> @f11() { +; CHECK-LABEL: f11: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpklshs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs( + <8 x i16> , + <8 x i16> ) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, + ret <16 x i8> %and +} + +declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) + +; PACKS (operand elements are 0): i64 -> i32 +define <4 x i32> @f12() { +; CHECK-LABEL: f12: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> , <2 x i64> ) + %and = and <4 x i32> %call, + ret <4 x i32> %and +} + +; PACKS (operand elements are 1): i64 -> i32 +define <4 x i32> @f13() { +; CHECK-LABEL: f13: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpksg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> , <2 x i64> ) + %and = and <4 x i32> %call, + ret <4 x i32> %and +} + +; PACKS (operand elements are 0): i32 -> i16 +define <8 x i16> @f14() { +; CHECK-LABEL: f14: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> , + <4 x i32> ) + %and = and <8 x i16> %call, + ret <8 x i16> %and +} + +; PACKS (operand elements are 1): i32 -> i16 +define <8 x i16> @f15() { +; CHECK-LABEL: f15: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpksf %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> , + <4 x i32> ) + %and = and <8 x i16> %call, + ret <8 x i16> %and +} + +; PACKS (operand elements are 0): i16 -> i8 +define <16 x i8> @f16() { +; CHECK-LABEL: f16: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpksh( + <8 x i16> , + <8 x i16> ) + %and = and <16 x i8> %call, + ret <16 x i8> %and +} + +; PACKS (operand elements are 1): i16 -> i8 +define <16 x i8> @f17() { +; CHECK-LABEL: f17: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpksh %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpksh( + <8 x i16> , + <8 x i16> ) + %and = and <16 x i8> %call, + ret <16 x i8> %and +} + +declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) + +; PACKLS (operand elements are 0): i64 -> i32 +define <4 x i32> @f18() { +; CHECK-LABEL: f18: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> , <2 x i64> ) + %and = and <4 x i32> %call, + ret <4 x i32> %and +} + +; PACKLS (operand elements are 1): i64 -> i32 +define <4 x i32> @f19() { +; CHECK-LABEL: f19: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpklsg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> , <2 x i64> ) + %and = and <4 x i32> %call, + ret <4 x i32> %and +} + +; PACKLS (operand elements are 0): i32 -> i16 +define <8 x i16> @f20() { +; CHECK-LABEL: f20: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> , + <4 x i32> ) + %and = and <8 x i16> %call, + ret <8 x i16> %and +} + +; PACKLS (operand elements are 1): i32 -> i16 +define <8 x i16> @f21() { +; CHECK-LABEL: f21: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpklsf %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> , + <4 x i32> ) + %and = and <8 x i16> %call, + ret <8 x i16> %and +} + +; PACKLS (operand elements are 0): i16 -> i8 +define <16 x i8> @f22() { +; CHECK-LABEL: f22: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpklsh( + <8 x i16> , + <8 x i16> ) + %and = and <16 x i8> %call, + ret <16 x i8> %and +} + +; PACKLS (operand elements are 1): i16 -> i8 +define <16 x i8> @f23() { +; CHECK-LABEL: f23: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpklsh %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpklsh( + <8 x i16> , + <8 x i16> ) + %and = and <16 x i8> %call, + ret <16 x i8> %and +} + +declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) + +; VPDI (operand elements are 0): +define <2 x i64> @f24() { +; CHECK-LABEL: f24: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> , + <2 x i64> , i32 0) + %res = and <2 x i64> %perm, + ret <2 x i64> %res +} + +; VPDI (operand elements are 1): +define <2 x i64> @f25() { +; CHECK-LABEL: f25: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpdi %v24, %v0, %v0, 0 +; CHECK-NEXT: br %r14 + %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> , + <2 x i64> , i32 0) + %res = and <2 x i64> %perm, + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) + +; VSLDB (operand elements are 0): +define <16 x i8> @f26() { +; CHECK-LABEL: f26: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8> + , <16 x i8> + , + i32 1) + + %res = and <16 x i8> %shfd, + ret <16 x i8> %res +} + +; VSLDB (operand elements are 1): +define <16 x i8> @f27() { +; CHECK-LABEL: f27: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsldb %v24, %v0, %v0, 1 +; CHECK-NEXT: br %r14 + %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8> + , <16 x i8> + , + i32 1) + + %res = and <16 x i8> %shfd, + ret <16 x i8> %res +} + +; Test that intrinsic CC result is recognized. +define i32 @f28(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: f28: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %res = and i32 %cc, -4 + ret i32 %res +} + +declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) + +; Test VPERM (operand elements are 0): +define <16 x i8> @f29() { +; CHECK-LABEL: f29: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %perm = call <16 x i8> @llvm.s390.vperm( + <16 x i8> , + <16 x i8> , + <16 x i8> ) + %res = and <16 x i8> %perm, + ret <16 x i8> %res +} + +; Test VPERM (operand elements are 1): +define <16 x i8> @f30() { +; CHECK-LABEL: f30: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vrepib %v1, 1 +; CHECK-NEXT: vperm %v24, %v1, %v1, %v0 +; CHECK-NEXT: br %r14 + %perm = call <16 x i8> @llvm.s390.vperm( + <16 x i8> , + <16 x i8> , + <16 x i8> ) + %res = and <16 x i8> %perm, + ret <16 x i8> %res +} diff --git a/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll b/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll new file mode 100644 index 00000000000..1966340adb9 --- /dev/null +++ b/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll @@ -0,0 +1,384 @@ +; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with +; vector intrinsics. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +declare <8 x i16> @llvm.s390.vuphb(<16 x i8>) +declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>) + +; VUPHB (used operand elements are 0) +define <8 x i16> @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +; VUPHB (used operand elements are 1) +; NOTE: The AND is optimized away, but instead of replicating '1' into <8 x +; i16>, the original vector constant is put in the constant pool and then +; unpacked (repeated in more test cases below). +define <8 x i16> @f1() { +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuphb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +; VUPLHB (used operand elements are 0) +define <8 x i16> @f2() { +; CHECK-LABEL: f2: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +; VUPLHB (used operand elements are 1) +define <8 x i16> @f3() { +; CHECK-LABEL: f3: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +declare <4 x i32> @llvm.s390.vuphh(<8 x i16>) +declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>) + +; VUPHH (used operand elements are 0) +define <4 x i32> @f4() { +; CHECK-LABEL: f4: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +; VUPHH (used operand elements are 1) +define <4 x i32> @f5() { +; CHECK-LABEL: f5: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +; VUPLHH (used operand elements are 0) +define <4 x i32> @f6() { +; CHECK-LABEL: f6: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +; VUPLHH (used operand elements are 1) +define <4 x i32> @f7() { +; CHECK-LABEL: f7: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +declare <2 x i64> @llvm.s390.vuphf(<4 x i32>) +declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>) + +; VUPHF (used operand elements are 0) +define <2 x i64> @f8() { +; CHECK-LABEL: f8: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; VUPHF (used operand elements are 1) +define <2 x i64> @f9() { +; CHECK-LABEL: f9: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; VUPLHF (used operand elements are 0) +define <2 x i64> @f10() { +; CHECK-LABEL: f10: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; VUPLHF (used operand elements are 1) +define <2 x i64> @f11() { +; CHECK-LABEL: f11: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +declare <8 x i16> @llvm.s390.vuplb(<16 x i8>) +declare <8 x i16> @llvm.s390.vupllb(<16 x i8>) + +; VUPLB (used operand elements are 0) +define <8 x i16> @f12() { +; CHECK-LABEL: f12: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8> + ) + + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +; VUPLB (used operand elements are 1) +define <8 x i16> @f13() { +; CHECK-LABEL: f13: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +; VUPLLB (used operand elements are 0) +define <8 x i16> @f14() { +; CHECK-LABEL: f14: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +; VUPLLB (used operand elements are 1) +define <8 x i16> @f15() { +; CHECK-LABEL: f15: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vupllb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8> + ) + %and = and <8 x i16> %unp, + ret <8 x i16> %and +} + +declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>) +declare <4 x i32> @llvm.s390.vupllh(<8 x i16>) + +; VUPLHW (used operand elements are 0) +define <4 x i32> @f16() { +; CHECK-LABEL: f16: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> + ) + + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +; VUPLHW (used operand elements are 1) +define <4 x i32> @f17() { +; CHECK-LABEL: f17: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhw %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +; VUPLLH (used operand elements are 0) +define <4 x i32> @f18() { +; CHECK-LABEL: f18: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +; VUPLLH (used operand elements are 1) +define <4 x i32> @f19() { +; CHECK-LABEL: f19: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vupllh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16> + ) + %and = and <4 x i32> %unp, + ret <4 x i32> %and +} + +declare <2 x i64> @llvm.s390.vuplf(<4 x i32>) +declare <2 x i64> @llvm.s390.vupllf(<4 x i32>) + +; VUPLF (used operand elements are 0) +define <2 x i64> @f20() { +; CHECK-LABEL: f20: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; VUPLF (used operand elements are 1) +define <2 x i64> @f21() { +; CHECK-LABEL: f21: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; VUPLLF (used operand elements are 0) +define <2 x i64> @f22() { +; CHECK-LABEL: f22: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; VUPLLF (used operand elements are 1) +define <2 x i64> @f23() { +; CHECK-LABEL: f23: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vupllf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; Test that signed unpacking of positive elements gives known zeros in high part. +define <2 x i64> @f24() { +; CHECK-LABEL: f24: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; Test that signed unpacking of negative elements gives known ones in high part. +define <2 x i64> @f25() { +; CHECK-LABEL: f25: +; CHECK-LABEL: # %bb.0: +; 61680 = 0xf0f0 +; CHECK-NEXT: vgbm %v24, 61680 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} + +; Test that logical unpacking of negative elements gives known zeros in high part. +define <2 x i64> @f26() { +; CHECK-LABEL: f26: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> ) + %and = and <2 x i64> %unp, + ret <2 x i64> %and +} diff --git a/test/CodeGen/SystemZ/knownbits.ll b/test/CodeGen/SystemZ/knownbits.ll new file mode 100644 index 00000000000..703c0bf9479 --- /dev/null +++ b/test/CodeGen/SystemZ/knownbits.ll @@ -0,0 +1,51 @@ +; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode(). +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +; SystemZISD::REPLICATE +define i32 @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK: vlgvf +; CHECK-NOT: lhi %r2, 0 +; CHECK-NOT: chi %r0, 0 +; CHECK-NOT: lochilh %r2, 1 +; CHECK: br %r14 + %cmp0 = icmp ne <4 x i32> undef, zeroinitializer + %zxt0 = zext <4 x i1> %cmp0 to <4 x i32> + %ext0 = extractelement <4 x i32> %zxt0, i32 3 + br label %exit + +exit: +; The vector icmp+zext involves a REPLICATE of 1's. If KnownBits reflects +; this, DAGCombiner can see that the i32 icmp and zext here are not needed. + %cmp1 = icmp ne i32 %ext0, 0 + %zxt1 = zext i1 %cmp1 to i32 + ret i32 %zxt1 +} + +; SystemZISD::JOIN_DWORDS (and REPLICATE) +define void @f1() { +; The DAG XOR has JOIN_DWORDS and REPLICATE operands. With KnownBits properly set +; for both these nodes, ICMP is used instead of TM during lowering because +; adjustForRedundantAnd() succeeds. +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK-NOT: tmll +; CHECK-NOT: jne +; CHECK: cijlh + %1 = load i16, i16* null, align 2 + %2 = icmp eq i16 %1, 0 + %3 = insertelement <2 x i1> undef, i1 %2, i32 0 + %4 = insertelement <2 x i1> %3, i1 true, i32 1 + %5 = xor <2 x i1> %4, + %6 = extractelement <2 x i1> %5, i32 0 + %7 = or i1 %6, undef + br i1 %7, label %9, label %8 + +;