1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[PowerPC] Implemented Vector Load with Zero and Signed Extend Builtins

This patch implements the builtins for Vector Load with Zero and Signed Extend Builtins (lxvr_x for b, h, w, d), and adds the appropriate test cases for these builtins. The builtins utilize the vector load instructions itnroduced with ISA 3.1.

Differential Revision: 	https://reviews.llvm.org/D82502#inline-797941
This commit is contained in:
Albion Fung 2020-08-28 11:27:07 -05:00
parent 34d811b345
commit c16246d9bf
4 changed files with 251 additions and 0 deletions

View File

@ -1527,6 +1527,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PPCISD::STRICT_FCFIDS";
case PPCISD::STRICT_FCFIDUS:
return "PPCISD::STRICT_FCFIDUS";
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
}
return nullptr;
}
@ -13639,6 +13640,46 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
// Look for the pattern of a load from a narrow width to i128, feeding
// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
// (LXVRZX). This node represents a zero extending load that will be matched
// to the Load VSX Vector Rightmost instructions.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
// This combine is only eligible for a BUILD_VECTOR of v1i128.
if (N->getValueType(0) != MVT::v1i128)
return SDValue();
SDValue Operand = N->getOperand(0);
// Proceed with the transformation if the operand to the BUILD_VECTOR
// is a load instruction.
if (Operand.getOpcode() != ISD::LOAD)
return SDValue();
LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
EVT MemoryType = LD->getMemoryVT();
// This transformation is only valid if the we are loading either a byte,
// halfword, word, or doubleword.
bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
MemoryType == MVT::i32 || MemoryType == MVT::i64;
// Ensure that the load from the narrow width is being zero extended to i128.
if (!ValidLDType ||
(LD->getExtensionType() != ISD::ZEXTLOAD &&
LD->getExtensionType() != ISD::EXTLOAD))
return SDValue();
SDValue LoadOps[] = {
LD->getChain(), LD->getBasePtr(),
DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
DAG.getVTList(MVT::v1i128, MVT::Other),
LoadOps, MemoryType, LD->getMemOperand());
}
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@ -13676,6 +13717,14 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
return Reduced;
}
// On Power10, the Load VSX Vector Rightmost instructions can be utilized
// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
// is a load from <valid narrow width> to i128.
if (Subtarget.isISA3_1()) {
SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
if (BVOfZLoad)
return BVOfZLoad;
}
if (N->getValueType(0) != MVT::v2f64)
return SDValue();

View File

@ -494,6 +494,12 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
/// LXVRZX - Load VSX Vector Rightmost and Zero Extend
/// This node represents v1i128 BUILD_VECTOR of a zero extending load
/// instruction from <byte, halfword, word, or doubleword> to i128.
/// Allows utilization of the Load VSX Vector Rightmost Instructions.
LXVRZX,
/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
/// the vector type to load vector in big-endian element order.

View File

@ -18,6 +18,15 @@ def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
// address computations).
class isPCRel { bit PCRel = 1; }
// PowerPC specific type constraints.
def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
]>;
// PPC Specific DAG Nodes.
def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
[SDNPHasChain, SDNPMayLoad]>;
// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin> : Instruction {
@ -1326,6 +1335,15 @@ let Predicates = [IsISA3_1] in {
(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
}
let AddedComplexity = 400, Predicates = [IsISA3_1] in {

View File

@ -239,3 +239,181 @@ entry:
store i64 %conv, i64* %add.ptr, align 8
ret void
}
define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
; CHECK-LABEL: vec_xl_zext:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvrbx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: lxvrbx vs0, r4, r3
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
%0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
; CHECK-LABEL: vec_xl_zext_short:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 1
; CHECK-NEXT: lxvrhx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext_short:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 1
; CHECK-O0-NEXT: lxvrhx vs0, r4, r3
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
%0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
; CHECK-LABEL: vec_xl_zext_word:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 2
; CHECK-NEXT: lxvrwx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext_word:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 2
; CHECK-O0-NEXT: lxvrwx vs0, r4, r3
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
%0 = load i32, i32* %add.ptr, align 4
%conv = zext i32 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
; CHECK-LABEL: vec_xl_zext_dw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lxvrdx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext_dw:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 3
; CHECK-O0-NEXT: lxvrdx vs0, r4, r3
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
%0 = load i64, i64* %add.ptr, align 8
%conv = zext i64 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
; CHECK-LABEL: vec_xl_sext_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lbzx r3, r4, r3
; CHECK-NEXT: extsb r3, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_b:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: lbzx r3, r4, r3
; CHECK-O0-NEXT: extsb r3, r3
; CHECK-O0-NEXT: sradi r4, r3, 63
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset
%0 = load i8, i8* %add.ptr, align 1
%conv = sext i8 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
; CHECK-LABEL: vec_xl_sext_h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 1
; CHECK-NEXT: lhax r3, r4, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_h:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 1
; CHECK-O0-NEXT: lhax r3, r4, r3
; CHECK-O0-NEXT: sradi r4, r3, 63
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset
%0 = load i16, i16* %add.ptr, align 2
%conv = sext i16 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) {
; CHECK-LABEL: vec_xl_sext_w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 2
; CHECK-NEXT: lwax r3, r4, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_w:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 2
; CHECK-O0-NEXT: lwax r3, r4, r3
; CHECK-O0-NEXT: sradi r4, r3, 63
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset
%0 = load i32, i32* %add.ptr, align 4
%conv = sext i32 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}
define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) {
; CHECK-LABEL: vec_xl_sext_d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: ldx r3, r4, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_d:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 3
; CHECK-O0-NEXT: ldx r3, r4, r3
; CHECK-O0-NEXT: sradi r4, r3, 63
; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset
%0 = load i64, i64* %add.ptr, align 8
%conv = sext i64 %0 to i128
%splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
ret <1 x i128> %splat.splatinsert
}