mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[PowerPC] Implemented Vector Load with Zero and Signed Extend Builtins
This patch implements the builtins for Vector Load with Zero and Signed Extend Builtins (lxvr_x for b, h, w, d), and adds the appropriate test cases for these builtins. The builtins utilize the vector load instructions itnroduced with ISA 3.1. Differential Revision: https://reviews.llvm.org/D82502#inline-797941
This commit is contained in:
parent
34d811b345
commit
c16246d9bf
@ -1527,6 +1527,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
return "PPCISD::STRICT_FCFIDS";
|
||||
case PPCISD::STRICT_FCFIDUS:
|
||||
return "PPCISD::STRICT_FCFIDUS";
|
||||
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -13639,6 +13640,46 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Look for the pattern of a load from a narrow width to i128, feeding
|
||||
// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
|
||||
// (LXVRZX). This node represents a zero extending load that will be matched
|
||||
// to the Load VSX Vector Rightmost instructions.
|
||||
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
|
||||
// This combine is only eligible for a BUILD_VECTOR of v1i128.
|
||||
if (N->getValueType(0) != MVT::v1i128)
|
||||
return SDValue();
|
||||
|
||||
SDValue Operand = N->getOperand(0);
|
||||
// Proceed with the transformation if the operand to the BUILD_VECTOR
|
||||
// is a load instruction.
|
||||
if (Operand.getOpcode() != ISD::LOAD)
|
||||
return SDValue();
|
||||
|
||||
LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
|
||||
EVT MemoryType = LD->getMemoryVT();
|
||||
|
||||
// This transformation is only valid if the we are loading either a byte,
|
||||
// halfword, word, or doubleword.
|
||||
bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
|
||||
MemoryType == MVT::i32 || MemoryType == MVT::i64;
|
||||
|
||||
// Ensure that the load from the narrow width is being zero extended to i128.
|
||||
if (!ValidLDType ||
|
||||
(LD->getExtensionType() != ISD::ZEXTLOAD &&
|
||||
LD->getExtensionType() != ISD::EXTLOAD))
|
||||
return SDValue();
|
||||
|
||||
SDValue LoadOps[] = {
|
||||
LD->getChain(), LD->getBasePtr(),
|
||||
DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
|
||||
|
||||
return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
|
||||
DAG.getVTList(MVT::v1i128, MVT::Other),
|
||||
LoadOps, MemoryType, LD->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
||||
@ -13676,6 +13717,14 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
|
||||
return Reduced;
|
||||
}
|
||||
|
||||
// On Power10, the Load VSX Vector Rightmost instructions can be utilized
|
||||
// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
|
||||
// is a load from <valid narrow width> to i128.
|
||||
if (Subtarget.isISA3_1()) {
|
||||
SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
|
||||
if (BVOfZLoad)
|
||||
return BVOfZLoad;
|
||||
}
|
||||
|
||||
if (N->getValueType(0) != MVT::v2f64)
|
||||
return SDValue();
|
||||
|
@ -494,6 +494,12 @@ namespace llvm {
|
||||
/// an xxswapd.
|
||||
LXVD2X,
|
||||
|
||||
/// LXVRZX - Load VSX Vector Rightmost and Zero Extend
|
||||
/// This node represents v1i128 BUILD_VECTOR of a zero extending load
|
||||
/// instruction from <byte, halfword, word, or doubleword> to i128.
|
||||
/// Allows utilization of the Load VSX Vector Rightmost Instructions.
|
||||
LXVRZX,
|
||||
|
||||
/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
|
||||
/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
|
||||
/// the vector type to load vector in big-endian element order.
|
||||
|
@ -18,6 +18,15 @@ def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
|
||||
// address computations).
|
||||
class isPCRel { bit PCRel = 1; }
|
||||
|
||||
// PowerPC specific type constraints.
|
||||
def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
|
||||
]>;
|
||||
|
||||
// PPC Specific DAG Nodes.
|
||||
def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
|
||||
// Top-level class for prefixed instructions.
|
||||
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin> : Instruction {
|
||||
@ -1326,6 +1335,15 @@ let Predicates = [IsISA3_1] in {
|
||||
(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
|
||||
def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
|
||||
(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
|
||||
|
||||
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
|
||||
(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
|
||||
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
|
||||
(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
|
||||
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
|
||||
(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
|
||||
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
|
||||
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 400, Predicates = [IsISA3_1] in {
|
||||
|
@ -239,3 +239,181 @@ entry:
|
||||
store i64 %conv, i64* %add.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
|
||||
; CHECK-LABEL: vec_xl_zext:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvrbx v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_zext:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: lxvrbx vs0, r4, r3
|
||||
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
|
||||
%0 = load i8, i8* %add.ptr, align 1
|
||||
%conv = zext i8 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
|
||||
; CHECK-LABEL: vec_xl_zext_short:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r3, 1
|
||||
; CHECK-NEXT: lxvrhx v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_zext_short:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: sldi r3, r3, 1
|
||||
; CHECK-O0-NEXT: lxvrhx vs0, r4, r3
|
||||
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
|
||||
%0 = load i16, i16* %add.ptr, align 2
|
||||
%conv = zext i16 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
|
||||
; CHECK-LABEL: vec_xl_zext_word:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r3, 2
|
||||
; CHECK-NEXT: lxvrwx v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_zext_word:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: sldi r3, r3, 2
|
||||
; CHECK-O0-NEXT: lxvrwx vs0, r4, r3
|
||||
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
|
||||
%0 = load i32, i32* %add.ptr, align 4
|
||||
%conv = zext i32 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
|
||||
; CHECK-LABEL: vec_xl_zext_dw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r3, 3
|
||||
; CHECK-NEXT: lxvrdx v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_zext_dw:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: sldi r3, r3, 3
|
||||
; CHECK-O0-NEXT: lxvrdx vs0, r4, r3
|
||||
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
|
||||
%0 = load i64, i64* %add.ptr, align 8
|
||||
%conv = zext i64 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
|
||||
; CHECK-LABEL: vec_xl_sext_b:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lbzx r3, r4, r3
|
||||
; CHECK-NEXT: extsb r3, r3
|
||||
; CHECK-NEXT: sradi r4, r3, 63
|
||||
; CHECK-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_sext_b:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: lbzx r3, r4, r3
|
||||
; CHECK-O0-NEXT: extsb r3, r3
|
||||
; CHECK-O0-NEXT: sradi r4, r3, 63
|
||||
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset
|
||||
%0 = load i8, i8* %add.ptr, align 1
|
||||
%conv = sext i8 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
|
||||
; CHECK-LABEL: vec_xl_sext_h:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r3, 1
|
||||
; CHECK-NEXT: lhax r3, r4, r3
|
||||
; CHECK-NEXT: sradi r4, r3, 63
|
||||
; CHECK-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_sext_h:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: sldi r3, r3, 1
|
||||
; CHECK-O0-NEXT: lhax r3, r4, r3
|
||||
; CHECK-O0-NEXT: sradi r4, r3, 63
|
||||
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset
|
||||
%0 = load i16, i16* %add.ptr, align 2
|
||||
%conv = sext i16 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) {
|
||||
; CHECK-LABEL: vec_xl_sext_w:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r3, 2
|
||||
; CHECK-NEXT: lwax r3, r4, r3
|
||||
; CHECK-NEXT: sradi r4, r3, 63
|
||||
; CHECK-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_sext_w:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: sldi r3, r3, 2
|
||||
; CHECK-O0-NEXT: lwax r3, r4, r3
|
||||
; CHECK-O0-NEXT: sradi r4, r3, 63
|
||||
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset
|
||||
%0 = load i32, i32* %add.ptr, align 4
|
||||
%conv = sext i32 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
||||
define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) {
|
||||
; CHECK-LABEL: vec_xl_sext_d:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r3, 3
|
||||
; CHECK-NEXT: ldx r3, r4, r3
|
||||
; CHECK-NEXT: sradi r4, r3, 63
|
||||
; CHECK-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xl_sext_d:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: sldi r3, r3, 3
|
||||
; CHECK-O0-NEXT: ldx r3, r4, r3
|
||||
; CHECK-O0-NEXT: sradi r4, r3, 63
|
||||
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset
|
||||
%0 = load i64, i64* %add.ptr, align 8
|
||||
%conv = sext i64 %0 to i128
|
||||
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
|
||||
ret <1 x i128> %splat.splatinsert
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user