1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[SystemZ] computeKnownBitsForTargetNode() / ComputeNumSignBitsForTargetNode()

Improve/implement these methods to improve DAG combining. This mainly
concerns intrinsics.

Some constant operands to SystemZISD nodes have been marked Opaque to avoid
transforming back and forth between generic and target nodes infinitely.

Review: Ulrich Weigand
llvm-svn: 327765
This commit is contained in:
Jonas Paulsson 2018-03-17 08:32:12 +00:00
parent cebc088508
commit 3203edf16f
9 changed files with 1560 additions and 21 deletions

View File

@ -4185,12 +4185,15 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
const SDLoc &DL, EVT VT, uint64_t Value,
unsigned BitsPerElement) {
// Signed 16-bit values can be replicated using VREPI.
// Mark the constants as opaque or DAGCombiner will convert back to
// BUILD_VECTOR.
int64_t SignedValue = SignExtend64(Value, BitsPerElement);
if (isInt<16>(SignedValue)) {
MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
SystemZ::VectorBits / BitsPerElement);
SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
DAG.getConstant(SignedValue, DL, MVT::i32));
SDValue Op = DAG.getNode(
SystemZISD::REPLICATE, DL, VecVT,
DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
// See whether rotating the constant left some N places gives a value that
@ -4206,9 +4209,10 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
End -= 64 - BitsPerElement;
MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
SystemZ::VectorBits / BitsPerElement);
SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
DAG.getConstant(Start, DL, MVT::i32),
DAG.getConstant(End, DL, MVT::i32));
SDValue Op = DAG.getNode(
SystemZISD::ROTATE_MASK, DL, VecVT,
DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
return SDValue();
@ -4421,8 +4425,9 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
// priority over other methods below.
uint64_t Mask = 0;
if (tryBuildVectorByteMask(BVN, Mask)) {
SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
DAG.getConstant(Mask, DL, MVT::i32));
SDValue Op = DAG.getNode(
SystemZISD::BYTE_MASK, DL, MVT::v16i8,
DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
@ -5605,28 +5610,293 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
// Return the demanded elements for the OpNo source operand of Op. DemandedElts
// are for Op.
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
unsigned OpNo) {
EVT VT = Op.getValueType();
unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
APInt SrcDemE;
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (Id) {
case Intrinsic::s390_vpksh: // PACKS
case Intrinsic::s390_vpksf:
case Intrinsic::s390_vpksg:
case Intrinsic::s390_vpkshs: // PACKS_CC
case Intrinsic::s390_vpksfs:
case Intrinsic::s390_vpksgs:
case Intrinsic::s390_vpklsh: // PACKLS
case Intrinsic::s390_vpklsf:
case Intrinsic::s390_vpklsg:
case Intrinsic::s390_vpklshs: // PACKLS_CC
case Intrinsic::s390_vpklsfs:
case Intrinsic::s390_vpklsgs:
// VECTOR PACK truncates the elements of two source vectors into one.
SrcDemE = DemandedElts;
if (OpNo == 2)
SrcDemE.lshrInPlace(NumElts / 2);
SrcDemE = SrcDemE.trunc(NumElts / 2);
break;
// VECTOR UNPACK extends half the elements of the source vector.
case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
case Intrinsic::s390_vuphh:
case Intrinsic::s390_vuphf:
case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
case Intrinsic::s390_vuplhh:
case Intrinsic::s390_vuplhf:
SrcDemE = APInt(NumElts * 2, 0);
SrcDemE.insertBits(DemandedElts, 0);
break;
case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
case Intrinsic::s390_vuplhw:
case Intrinsic::s390_vuplf:
case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
case Intrinsic::s390_vupllh:
case Intrinsic::s390_vupllf:
SrcDemE = APInt(NumElts * 2, 0);
SrcDemE.insertBits(DemandedElts, NumElts);
break;
case Intrinsic::s390_vpdi: {
// VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
SrcDemE = APInt(NumElts, 0);
if (!DemandedElts[OpNo - 1])
break;
unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
// Demand input element 0 or 1, given by the mask bit value.
SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
break;
}
case Intrinsic::s390_vsldb: {
// VECTOR SHIFT LEFT DOUBLE BY BYTE
assert(VT == MVT::v16i8 && "Unexpected type.");
unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
unsigned NumSrc0Els = 16 - FirstIdx;
SrcDemE = APInt(NumElts, 0);
if (OpNo == 1) {
APInt DemEls = DemandedElts.trunc(NumSrc0Els);
SrcDemE.insertBits(DemEls, FirstIdx);
} else {
APInt DemEls = DemandedElts.lshr(NumSrc0Els);
SrcDemE.insertBits(DemEls, 0);
}
break;
}
case Intrinsic::s390_vperm:
SrcDemE = APInt(NumElts, 1);
break;
default:
llvm_unreachable("Unhandled intrinsic.");
break;
}
} else {
switch (Opcode) {
case SystemZISD::JOIN_DWORDS:
// Scalar operand.
SrcDemE = APInt(1, 1);
break;
case SystemZISD::SELECT_CCMASK:
SrcDemE = DemandedElts;
break;
default:
llvm_unreachable("Unhandled opcode.");
break;
}
}
return SrcDemE;
}
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG, unsigned Depth,
unsigned OpNo) {
APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth);
DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1);
DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1);
Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
Known.One = LHSKnown.One & RHSKnown.One;
}
void
SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = Known.getBitWidth();
Known.resetAll();
switch (Op.getOpcode()) {
case SystemZISD::SELECT_CCMASK: {
KnownBits TrueKnown(BitWidth), FalseKnown(BitWidth);
DAG.computeKnownBits(Op.getOperand(0), TrueKnown, Depth + 1);
DAG.computeKnownBits(Op.getOperand(1), FalseKnown, Depth + 1);
Known.Zero = TrueKnown.Zero & FalseKnown.Zero;
Known.One = TrueKnown.One & FalseKnown.One;
break;
// Intrinsic CC result is returned in the two low bits.
unsigned tmp0, tmp1; // not used
if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
Known.Zero.setBitsFrom(2);
return;
}
EVT VT = Op.getValueType();
if (Op.getResNo() != 0 || VT == MVT::Untyped)
return;
assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
"KnownBits does not match VT in bitwidth");
assert ((!VT.isVector() ||
(DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
"DemandedElts does not match VT number of elements");
unsigned BitWidth = Known.getBitWidth();
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
bool IsLogical = false;
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (Id) {
case Intrinsic::s390_vpksh: // PACKS
case Intrinsic::s390_vpksf:
case Intrinsic::s390_vpksg:
case Intrinsic::s390_vpkshs: // PACKS_CC
case Intrinsic::s390_vpksfs:
case Intrinsic::s390_vpksgs:
case Intrinsic::s390_vpklsh: // PACKLS
case Intrinsic::s390_vpklsf:
case Intrinsic::s390_vpklsg:
case Intrinsic::s390_vpklshs: // PACKLS_CC
case Intrinsic::s390_vpklsfs:
case Intrinsic::s390_vpklsgs:
case Intrinsic::s390_vpdi:
case Intrinsic::s390_vsldb:
case Intrinsic::s390_vperm:
computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
break;
case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
case Intrinsic::s390_vuplhh:
case Intrinsic::s390_vuplhf:
case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
case Intrinsic::s390_vupllh:
case Intrinsic::s390_vupllf:
IsLogical = true;
LLVM_FALLTHROUGH;
case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
case Intrinsic::s390_vuphh:
case Intrinsic::s390_vuphf:
case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
case Intrinsic::s390_vuplhw:
case Intrinsic::s390_vuplf: {
SDValue SrcOp = Op.getOperand(1);
unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
Known = KnownBits(SrcBitWidth);
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1);
if (IsLogical) {
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(SrcBitWidth);
} else
Known = Known.sext(BitWidth);
break;
}
default:
break;
}
} else {
switch (Opcode) {
case SystemZISD::JOIN_DWORDS:
case SystemZISD::SELECT_CCMASK:
computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
break;
case SystemZISD::REPLICATE: {
SDValue SrcOp = Op.getOperand(0);
DAG.computeKnownBits(SrcOp, Known, Depth + 1);
if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
break;
}
default:
break;
}
}
default:
break;
// Known has the width of the source operand(s). Adjust if needed to match
// the passed bitwidth.
if (Known.getBitWidth() != BitWidth)
Known = Known.zextOrTrunc(BitWidth);
}
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, unsigned Depth,
unsigned OpNo) {
APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
if (LHS == 1) return 1; // Early out.
APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
if (RHS == 1) return 1; // Early out.
unsigned Common = std::min(LHS, RHS);
unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
EVT VT = Op.getValueType();
unsigned VTBits = VT.getScalarSizeInBits();
if (SrcBitWidth > VTBits) { // PACK
unsigned SrcExtraBits = SrcBitWidth - VTBits;
if (Common > SrcExtraBits)
return (Common - SrcExtraBits);
return 1;
}
assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
return Common;
}
unsigned
SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
if (Op.getResNo() != 0)
return 1;
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (Id) {
case Intrinsic::s390_vpksh: // PACKS
case Intrinsic::s390_vpksf:
case Intrinsic::s390_vpksg:
case Intrinsic::s390_vpkshs: // PACKS_CC
case Intrinsic::s390_vpksfs:
case Intrinsic::s390_vpksgs:
case Intrinsic::s390_vpklsh: // PACKLS
case Intrinsic::s390_vpklsf:
case Intrinsic::s390_vpklsg:
case Intrinsic::s390_vpklshs: // PACKLS_CC
case Intrinsic::s390_vpklsfs:
case Intrinsic::s390_vpklsgs:
case Intrinsic::s390_vpdi:
case Intrinsic::s390_vsldb:
case Intrinsic::s390_vperm:
return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
case Intrinsic::s390_vuphh:
case Intrinsic::s390_vuphf:
case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
case Intrinsic::s390_vuplhw:
case Intrinsic::s390_vuplf: {
SDValue PackedOp = Op.getOperand(1);
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
EVT VT = Op.getValueType();
unsigned VTBits = VT.getScalarSizeInBits();
Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
return Tmp;
}
default:
break;
}
} else {
switch (Opcode) {
case SystemZISD::SELECT_CCMASK:
return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
default:
break;
}
}
return 1;
}
//===----------------------------------------------------------------------===//

View File

@ -499,6 +499,12 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// Determine the number of bits in the operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
ISD::NodeType getExtendForAtomicOps() const override {
return ISD::ANY_EXTEND;
}

View File

@ -0,0 +1,460 @@
; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
; vector intrinsics.
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
; PACKS_CC (operand elements are 0): i64 -> i32
define <4 x i32> @f0() {
; CHECK-LABEL: f0:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
%extr = extractvalue {<4 x i32>, i32} %call, 0
%and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKS_CC (operand elements are 1): i64 -> i32
; NOTE: The vector AND is optimized away, but vrepig+vpksgs is used instead
; of vrepif. Similarly for more test cases below.
define <4 x i32> @f1() {
; CHECK-LABEL: f1:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepig %v0, 1
; CHECK-NEXT: vpksgs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
%extr = extractvalue {<4 x i32>, i32} %call, 0
%and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKS_CC (operand elements are 0): i32 -> i16
define <8 x i16> @f2() {
; CHECK-LABEL: f2:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
%extr = extractvalue {<8 x i16>, i32} %call, 0
%and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKS_CC (operand elements are 1): i32 -> i16
define <8 x i16> @f3() {
; CHECK-LABEL: f3:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepif %v0, 1
; CHECK-NEXT: vpksfs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
%extr = extractvalue {<8 x i16>, i32} %call, 0
%and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKS_CC (operand elements are 0): i16 -> i8
define <16 x i8> @f4() {
; CHECK-LABEL: f4:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
%extr = extractvalue {<16 x i8>, i32} %call, 0
%and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
; PACKS_CC (operand elements are 1): i16 -> i8
define <16 x i8> @f5() {
; CHECK-LABEL: f5:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepih %v0, 1
; CHECK-NEXT: vpkshs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
%extr = extractvalue {<16 x i8>, i32} %call, 0
%and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
; PACKLS_CC (operand elements are 0): i64 -> i32
define <4 x i32> @f6() {
; CHECK-LABEL: f6:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
%extr = extractvalue {<4 x i32>, i32} %call, 0
%and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKLS_CC (operand elements are 1): i64 -> i32
define <4 x i32> @f7() {
; CHECK-LABEL: f7:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepig %v0, 1
; CHECK-NEXT: vpklsgs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
%extr = extractvalue {<4 x i32>, i32} %call, 0
%and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKLS_CC (operand elements are 0): i32 -> i16
define <8 x i16> @f8() {
; CHECK-LABEL: f8:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
%extr = extractvalue {<8 x i16>, i32} %call, 0
%and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKLS_CC (operand elements are 1): i32 -> i16
define <8 x i16> @f9() {
; CHECK-LABEL: f9:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepif %v0, 1
; CHECK-NEXT: vpklsfs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
%extr = extractvalue {<8 x i16>, i32} %call, 0
%and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKLS_CC (operand elements are 0): i16 -> i8
define <16 x i8> @f10() {
; CHECK-LABEL: f10:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
%extr = extractvalue {<16 x i8>, i32} %call, 0
%and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
; PACKLS_CC (operand elements are 1): i16 -> i8
define <16 x i8> @f11() {
; CHECK-LABEL: f11:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepih %v0, 1
; CHECK-NEXT: vpklshs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
%extr = extractvalue {<16 x i8>, i32} %call, 0
%and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
; PACKS (operand elements are 0): i64 -> i32
define <4 x i32> @f12() {
; CHECK-LABEL: f12:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
%and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKS (operand elements are 1): i64 -> i32
define <4 x i32> @f13() {
; CHECK-LABEL: f13:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepig %v0, 1
; CHECK-NEXT: vpksg %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
%and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKS (operand elements are 0): i32 -> i16
define <8 x i16> @f14() {
; CHECK-LABEL: f14:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
%and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKS (operand elements are 1): i32 -> i16
define <8 x i16> @f15() {
; CHECK-LABEL: f15:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepif %v0, 1
; CHECK-NEXT: vpksf %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
%and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKS (operand elements are 0): i16 -> i8
define <16 x i8> @f16() {
; CHECK-LABEL: f16:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call <16 x i8> @llvm.s390.vpksh(
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
%and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
; PACKS (operand elements are 1): i16 -> i8
define <16 x i8> @f17() {
; CHECK-LABEL: f17:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepih %v0, 1
; CHECK-NEXT: vpksh %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <16 x i8> @llvm.s390.vpksh(
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
%and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
; PACKLS (operand elements are 0): i64 -> i32
define <4 x i32> @f18() {
; CHECK-LABEL: f18:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
%and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKLS (operand elements are 1): i64 -> i32
define <4 x i32> @f19() {
; CHECK-LABEL: f19:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepig %v0, 1
; CHECK-NEXT: vpklsg %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
%and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; PACKLS (operand elements are 0): i32 -> i16
define <8 x i16> @f20() {
; CHECK-LABEL: f20:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
%and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKLS (operand elements are 1): i32 -> i16
define <8 x i16> @f21() {
; CHECK-LABEL: f21:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepif %v0, 1
; CHECK-NEXT: vpklsf %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
%and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; PACKLS (operand elements are 0): i16 -> i8
define <16 x i8> @f22() {
; CHECK-LABEL: f22:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%call = call <16 x i8> @llvm.s390.vpklsh(
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
%and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
; PACKLS (operand elements are 1): i16 -> i8
define <16 x i8> @f23() {
; CHECK-LABEL: f23:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepih %v0, 1
; CHECK-NEXT: vpklsh %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <16 x i8> @llvm.s390.vpklsh(
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
%and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %and
}
declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
; VPDI (operand elements are 0):
define <2 x i64> @f24() {
; CHECK-LABEL: f24:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 0>,
<2 x i64> <i64 0, i64 0>, i32 0)
%res = and <2 x i64> %perm, <i64 1, i64 1>
ret <2 x i64> %res
}
; VPDI (operand elements are 1):
define <2 x i64> @f25() {
; CHECK-LABEL: f25:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepig %v0, 1
; CHECK-NEXT: vpdi %v24, %v0, %v0, 0
; CHECK-NEXT: br %r14
%perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 1, i64 1>,
<2 x i64> <i64 1, i64 1>, i32 0)
%res = and <2 x i64> %perm, <i64 1, i64 1>
ret <2 x i64> %res
}
declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
; VSLDB (operand elements are 0):
define <16 x i8> @f26() {
; CHECK-LABEL: f26:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
<i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>
<i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
i32 1)
%res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %res
}
; VSLDB (operand elements are 1):
define <16 x i8> @f27() {
; CHECK-LABEL: f27:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vrepib %v0, 1
; CHECK-NEXT: vsldb %v24, %v0, %v0, 1
; CHECK-NEXT: br %r14
%shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
<i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>
<i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
i32 1)
%res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %res
}
; Test that intrinsic CC result is recognized.
define i32 @f28(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: f28:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: lhi %r2, 0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
%cc = extractvalue {<8 x i16>, i32} %call, 1
%res = and i32 %cc, -4
ret i32 %res
}
declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
; Test VPERM (operand elements are 0):
define <16 x i8> @f29() {
; CHECK-LABEL: f29:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%perm = call <16 x i8> @llvm.s390.vperm(
<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %res
}
; Test VPERM (operand elements are 1):
define <16 x i8> @f30() {
; CHECK-LABEL: f30:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v0, 0
; CHECK-NEXT: vrepib %v1, 1
; CHECK-NEXT: vperm %v24, %v1, %v1, %v0
; CHECK-NEXT: br %r14
%perm = call <16 x i8> @llvm.s390.vperm(
<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %res
}

View File

@ -0,0 +1,384 @@
; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
; vector intrinsics.
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>)
; VUPHB (used operand elements are 0)
define <8 x i16> @f0() {
; CHECK-LABEL: f0:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
<i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; VUPHB (used operand elements are 1)
; NOTE: The AND is optimized away, but instead of replicating '1' into <8 x
; i16>, the original vector constant is put in the constant pool and then
; unpacked (repeated in more test cases below).
define <8 x i16> @f1() {
; CHECK-LABEL: f1:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuphb %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
<i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; VUPLHB (used operand elements are 0)
define <8 x i16> @f2() {
; CHECK-LABEL: f2:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8>
<i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; VUPLHB (used operand elements are 1)
define <8 x i16> @f3() {
; CHECK-LABEL: f3:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuplhb %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8>
<i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>)
; VUPHH (used operand elements are 0)
define <4 x i32> @f4() {
; CHECK-LABEL: f4:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
<i16 0, i16 0, i16 0, i16 0,
i16 1, i16 1, i16 1, i16 1>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; VUPHH (used operand elements are 1)
define <4 x i32> @f5() {
; CHECK-LABEL: f5:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuphh %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
<i16 1, i16 1, i16 1, i16 1,
i16 0, i16 0, i16 0, i16 0>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; VUPLHH (used operand elements are 0)
define <4 x i32> @f6() {
; CHECK-LABEL: f6:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16>
<i16 0, i16 0, i16 0, i16 0,
i16 1, i16 1, i16 1, i16 1>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; VUPLHH (used operand elements are 1)
define <4 x i32> @f7() {
; CHECK-LABEL: f7:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuplhh %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16>
<i16 1, i16 1, i16 1, i16 1,
i16 0, i16 0, i16 0, i16 0>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>)
; VUPHF (used operand elements are 0)
define <2 x i64> @f8() {
; CHECK-LABEL: f8:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; VUPHF (used operand elements are 1)
define <2 x i64> @f9() {
; CHECK-LABEL: f9:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuphf %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; VUPLHF (used operand elements are 0)
define <2 x i64> @f10() {
; CHECK-LABEL: f10:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; VUPLHF (used operand elements are 1)
define <2 x i64> @f11() {
; CHECK-LABEL: f11:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuplhf %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
declare <8 x i16> @llvm.s390.vupllb(<16 x i8>)
; VUPLB (used operand elements are 0)
define <8 x i16> @f12() {
; CHECK-LABEL: f12:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
<i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; VUPLB (used operand elements are 1)
define <8 x i16> @f13() {
; CHECK-LABEL: f13:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuplb %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
<i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; VUPLLB (used operand elements are 0)
define <8 x i16> @f14() {
; CHECK-LABEL: f14:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8>
<i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
; VUPLLB (used operand elements are 1)
define <8 x i16> @f15() {
; CHECK-LABEL: f15:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vupllb %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8>
<i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
%and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %and
}
declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
declare <4 x i32> @llvm.s390.vupllh(<8 x i16>)
; VUPLHW (used operand elements are 0)
define <4 x i32> @f16() {
; CHECK-LABEL: f16:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
<i16 1, i16 1, i16 1, i16 1,
i16 0, i16 0, i16 0, i16 0>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; VUPLHW (used operand elements are 1)
define <4 x i32> @f17() {
; CHECK-LABEL: f17:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuplhw %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
<i16 0, i16 0, i16 0, i16 0,
i16 1, i16 1, i16 1, i16 1>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; VUPLLH (used operand elements are 0)
define <4 x i32> @f18() {
; CHECK-LABEL: f18:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16>
<i16 1, i16 1, i16 1, i16 1,
i16 0, i16 0, i16 0, i16 0>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
; VUPLLH (used operand elements are 1)
define <4 x i32> @f19() {
; CHECK-LABEL: f19:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vupllh %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16>
<i16 0, i16 0, i16 0, i16 0,
i16 1, i16 1, i16 1, i16 1>)
%and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %and
}
declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
declare <2 x i64> @llvm.s390.vupllf(<4 x i32>)
; VUPLF (used operand elements are 0)
define <2 x i64> @f20() {
; CHECK-LABEL: f20:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; VUPLF (used operand elements are 1)
define <2 x i64> @f21() {
; CHECK-LABEL: f21:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vuplf %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; VUPLLF (used operand elements are 0)
define <2 x i64> @f22() {
; CHECK-LABEL: f22:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; VUPLLF (used operand elements are 1)
define <2 x i64> @f23() {
; CHECK-LABEL: f23:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI
; CHECK-NEXT: vl %v0, 0(%r1)
; CHECK-NEXT: vupllf %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
%and = and <2 x i64> %unp, <i64 1, i64 1>
ret <2 x i64> %and
}
; Test that signed unpacking of positive elements gives known zeros in high part.
define <2 x i64> @f24() {
; CHECK-LABEL: f24:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
i64 -4294967296>
ret <2 x i64> %and
}
; Test that signed unpacking of negative elements gives known ones in high part.
define <2 x i64> @f25() {
; CHECK-LABEL: f25:
; CHECK-LABEL: # %bb.0:
; 61680 = 0xf0f0
; CHECK-NEXT: vgbm %v24, 61680
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
i64 -4294967296>
ret <2 x i64> %and
}
; Test that logical unpacking of negative elements gives known zeros in high part.
define <2 x i64> @f26() {
; CHECK-LABEL: f26:
; CHECK-LABEL: # %bb.0:
; CHECK-NEXT: vgbm %v24, 0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>)
%and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
i64 -4294967296>
ret <2 x i64> %and
}

View File

@ -0,0 +1,51 @@
; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode().
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
; SystemZISD::REPLICATE
define i32 @f0() {
; CHECK-LABEL: f0:
; CHECK-LABEL: # %bb.0:
; CHECK: vlgvf
; CHECK-NOT: lhi %r2, 0
; CHECK-NOT: chi %r0, 0
; CHECK-NOT: lochilh %r2, 1
; CHECK: br %r14
%cmp0 = icmp ne <4 x i32> undef, zeroinitializer
%zxt0 = zext <4 x i1> %cmp0 to <4 x i32>
%ext0 = extractelement <4 x i32> %zxt0, i32 3
br label %exit
exit:
; The vector icmp+zext involves a REPLICATE of 1's. If KnownBits reflects
; this, DAGCombiner can see that the i32 icmp and zext here are not needed.
%cmp1 = icmp ne i32 %ext0, 0
%zxt1 = zext i1 %cmp1 to i32
ret i32 %zxt1
}
; SystemZISD::JOIN_DWORDS (and REPLICATE)
define void @f1() {
; The DAG XOR has JOIN_DWORDS and REPLICATE operands. With KnownBits properly set
; for both these nodes, ICMP is used instead of TM during lowering because
; adjustForRedundantAnd() succeeds.
; CHECK-LABEL: f1:
; CHECK-LABEL: # %bb.0:
; CHECK-NOT: tmll
; CHECK-NOT: jne
; CHECK: cijlh
%1 = load i16, i16* null, align 2
%2 = icmp eq i16 %1, 0
%3 = insertelement <2 x i1> undef, i1 %2, i32 0
%4 = insertelement <2 x i1> %3, i1 true, i32 1
%5 = xor <2 x i1> %4, <i1 true, i1 true>
%6 = extractelement <2 x i1> %5, i32 0
%7 = or i1 %6, undef
br i1 %7, label %9, label %8
; <label>:8: ; preds = %0
unreachable
; <label>:9: ; preds = %0
unreachable
}

View File

@ -0,0 +1,236 @@
; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
; vector intrinsics.
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
; PACKS_CC: i64 -> i32
define <4 x i32> @f0() {
; CHECK-LABEL: f0:
; CHECK-LABEL: # %bb.0:
; CHECK: vpksgs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>)
%extr = extractvalue {<4 x i32>, i32} %call, 0
%trunc = trunc <4 x i32> %extr to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; PACKS_CC: i32 -> i16
define <8 x i16> @f1() {
; CHECK-LABEL: f1:
; CHECK-LABEL: # %bb.0:
; CHECK: vpksfs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
<4 x i32> <i32 0, i32 1, i32 1, i32 0>)
%extr = extractvalue {<8 x i16>, i32} %call, 0
%trunc = trunc <8 x i16> %extr to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; PACKS_CC: i16 -> i8
define <16 x i8> @f2() {
; CHECK-LABEL: f2:
; CHECK-LABEL: # %bb.0:
; CHECK: vpkshs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
%extr = extractvalue {<16 x i8>, i32} %call, 0
%trunc = trunc <16 x i8> %extr to <16 x i4>
%ret = sext <16 x i4> %trunc to <16 x i8>
ret <16 x i8> %ret
}
declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
; PACKLS_CC: i64 -> i32
define <4 x i32> @f3() {
; CHECK-LABEL: f3:
; CHECK-LABEL: # %bb.0:
; CHECK: vpklsgs %v24, %v1, %v0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
%extr = extractvalue {<4 x i32>, i32} %call, 0
%trunc = trunc <4 x i32> %extr to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; PACKLS_CC: i32 -> i16
define <8 x i16> @f4() {
; CHECK-LABEL: f4:
; CHECK-LABEL: # %bb.0:
; CHECK: vpklsfs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
<4 x i32> <i32 0, i32 1, i32 1, i32 0>)
%extr = extractvalue {<8 x i16>, i32} %call, 0
%trunc = trunc <8 x i16> %extr to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; PACKLS_CC: i16 -> i8
define <16 x i8> @f5() {
; CHECK-LABEL: f5:
; CHECK-LABEL: # %bb.0:
; CHECK: vpklshs %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
%extr = extractvalue {<16 x i8>, i32} %call, 0
%trunc = trunc <16 x i8> %extr to <16 x i4>
%ret = sext <16 x i4> %trunc to <16 x i8>
ret <16 x i8> %ret
}
declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
; PACKS: i64 -> i32
define <4 x i32> @f6() {
; CHECK-LABEL: f6:
; CHECK-LABEL: # %bb.0:
; CHECK: vpksg %v24, %v1, %v0
; CHECK-NEXT: br %r14
%call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
%trunc = trunc <4 x i32> %call to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; PACKS: i32 -> i16
define <8 x i16> @f7() {
; CHECK-LABEL: f7:
; CHECK-LABEL: # %bb.0:
; CHECK: vpksf %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
<4 x i32> <i32 0, i32 1, i32 1, i32 0>)
%trunc = trunc <8 x i16> %call to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; PACKS: i16 -> i8
define <16 x i8> @f8() {
; CHECK-LABEL: f8:
; CHECK-LABEL: # %bb.0:
; CHECK: vpksh %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <16 x i8> @llvm.s390.vpksh(
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
%trunc = trunc <16 x i8> %call to <16 x i4>
%ret = sext <16 x i4> %trunc to <16 x i8>
ret <16 x i8> %ret
}
declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
; PACKLS: i64 -> i32
define <4 x i32> @f9() {
; CHECK-LABEL: f9:
; CHECK-LABEL: # %bb.0:
; CHECK: vpklsg %v24, %v1, %v0
; CHECK-NEXT: br %r14
%call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
%trunc = trunc <4 x i32> %call to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
; PACKLS: i32 -> i16
define <8 x i16> @f10() {
; CHECK-LABEL: f10:
; CHECK-LABEL: # %bb.0:
; CHECK: vpklsf %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
<4 x i32> <i32 0, i32 1, i32 1, i32 0>)
%trunc = trunc <8 x i16> %call to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
; PACKLS: i16 -> i8
define <16 x i8> @f11() {
; CHECK-LABEL: f11:
; CHECK-LABEL: # %bb.0:
; CHECK: vpklsh %v24, %v0, %v0
; CHECK-NEXT: br %r14
%call = call <16 x i8> @llvm.s390.vpklsh(
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
<8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
%trunc = trunc <16 x i8> %call to <16 x i4>
%ret = sext <16 x i4> %trunc to <16 x i8>
ret <16 x i8> %ret
}
declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
; VPDI:
define <2 x i64> @f12() {
; CHECK-LABEL: f12:
; CHECK-LABEL: # %bb.0:
; CHECK: vpdi %v24, %v1, %v0, 0
; CHECK-NEXT: br %r14
%perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>,
<2 x i64> <i64 1, i64 0>, i32 0)
%trunc = trunc <2 x i64> %perm to <2 x i32>
%ret = sext <2 x i32> %trunc to <2 x i64>
ret <2 x i64> %ret
}
declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
; VSLDB:
define <16 x i8> @f13() {
; CHECK-LABEL: f13:
; CHECK-LABEL: # %bb.0:
; CHECK: vsldb %v24, %v0, %v0, 1
; CHECK-NEXT: br %r14
%shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
<i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8>
<i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
i32 1)
%trunc = trunc <16 x i8> %shfd to <16 x i4>
%ret = sext <16 x i4> %trunc to <16 x i8>
ret <16 x i8> %ret
}
declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
; Test VPERM:
define <16 x i8> @f14() {
; CHECK-LABEL: f14:
; CHECK-LABEL: # %bb.0:
; CHECK: vperm %v24, %v0, %v0, %v0
; CHECK-NEXT: br %r14
%perm = call <16 x i8> @llvm.s390.vperm(
<16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
<16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
<16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>)
%trunc = trunc <16 x i8> %perm to <16 x i4>
%ret = sext <16 x i4> %trunc to <16 x i8>
ret <16 x i8> %ret
}

View File

@ -0,0 +1,97 @@
; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
; vector intrinsics.
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
; VUPHB
define <8 x i16> @f0() {
; CHECK-LABEL: f0:
; CHECK-LABEL: # %bb.0:
; CHECK: vuphb %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
<i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1,
i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
%trunc = trunc <8 x i16> %unp to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
; VUPHH
define <4 x i32> @f1() {
; CHECK-LABEL: f1:
; CHECK-LABEL: # %bb.0:
; CHECK: vuphh %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
<i16 0, i16 1, i16 0, i16 1,
i16 0, i16 1, i16 0, i16 1>)
%trunc = trunc <4 x i32> %unp to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
; VUPHF
define <2 x i64> @f2() {
; CHECK-LABEL: f2:
; CHECK-LABEL: # %bb.0:
; CHECK: vuphf %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 1, i32 0, i32 1>)
%trunc = trunc <2 x i64> %unp to <2 x i32>
%ret = sext <2 x i32> %trunc to <2 x i64>
ret <2 x i64> %ret
}
declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
; VUPLB
define <8 x i16> @f3() {
; CHECK-LABEL: f3:
; CHECK-LABEL: # %bb.0:
; CHECK: vuplb %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
<i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1,
i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
%trunc = trunc <8 x i16> %unp to <8 x i8>
%ret = sext <8 x i8> %trunc to <8 x i16>
ret <8 x i16> %ret
}
declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
; VUPLHW
define <4 x i32> @f4() {
; CHECK-LABEL: f4:
; CHECK-LABEL: # %bb.0:
; CHECK: vuplhw %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
<i16 1, i16 0, i16 1, i16 0,
i16 1, i16 0, i16 1, i16 0>)
%trunc = trunc <4 x i32> %unp to <4 x i16>
%ret = sext <4 x i16> %trunc to <4 x i32>
ret <4 x i32> %ret
}
declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
; VUPLF
define <2 x i64> @f5() {
; CHECK-LABEL: f5:
; CHECK-LABEL: # %bb.0:
; CHECK: vuplf %v24, %v0
; CHECK-NEXT: br %r14
%unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 0, i32 1, i32 0>)
%trunc = trunc <2 x i64> %unp to <2 x i32>
%ret = sext <2 x i32> %trunc to <2 x i64>
ret <2 x i64> %ret
}

View File

@ -0,0 +1,36 @@
; Test that ComputeNumSignBitsForTargetNode() (SELECT_CCMASK) will help
; DAGCombiner so that it knows that %sel0 is already sign extended.
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -debug-only=isel < %s 2>&1 | FileCheck %s
%0 = type <{ %1*, i16, [6 x i8] }>
%1 = type { i32 (...)** }
define signext i16 @fun(%0* %Arg0, i16 signext %Arg1) {
entry:
br i1 undef, label %lab0, label %lab1
lab0:
%icmp0 = icmp eq i32 undef, 0
%sel0 = select i1 %icmp0, i16 %Arg1, i16 1
br label %lab1
lab1:
; CHECK: *** MachineFunction at end of ISel ***
; CHECK-LABEL: bb.2.lab1:
; CHECK-NOT: LHR
; CHECK: BRC
%phi0 = phi i16 [ 2, %entry ], [ %sel0, %lab0 ]
%sext0 = sext i16 %phi0 to i32
br i1 undef, label %lab2, label %lab3
lab2:
%and0 = and i32 %sext0, 8
%icmp1 = icmp eq i32 %and0, 0
%sel1 = select i1 %icmp1, i16 %phi0, i16 4
ret i16 %sel1
lab3:
ret i16 8
}

View File

@ -17,8 +17,7 @@ define void @pr32275(<4 x i8> %B15) {
; CHECK-NEXT: vlvgf [[REG2]], [[REG3]], 2
; CHECK-NEXT: vn [[REG2]], [[REG2]], [[REG0]]
; CHECK-NEXT: vlgvf [[REG4:%r[0-9]]], [[REG2]], 3
; CHECK-NEXT: tmll [[REG4]], 1
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: cijlh [[REG4]], 0, .LBB0_1
; CHECK-NEXT: # %bb.2: # %CF36
; CHECK-NEXT: br %r14
BB: