1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[mips][msa] Added support for matching shf from normal IR (i.e. not intrinsics)

llvm-svn: 191302
This commit is contained in:
Daniel Sanders 2013-09-24 14:20:00 +00:00
parent 7c64721346
commit db41b542e8
7 changed files with 149 additions and 11 deletions

View File

@ -211,6 +211,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
O << MipsFCCToString((Mips::CondCode)MO.getImm());
}
void MipsInstPrinter::
printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
llvm_unreachable("TODO");
}
bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
unsigned OpNo, raw_ostream &OS) {
OS << "\t" << Str << "\t";

View File

@ -96,6 +96,7 @@ private:
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
raw_ostream &OS);

View File

@ -225,6 +225,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT";
case MipsISD::VNOR: return "MipsISD::VNOR";
case MipsISD::VSHF: return "MipsISD::VSHF";
case MipsISD::SHF: return "MipsISD::SHF";
default: return NULL;
}
}

View File

@ -174,6 +174,7 @@ namespace llvm {
// Vector Shuffle with mask as an operand
VSHF, // Generic shuffle
SHF, // 4-element set shuffle.
// Combined (XOR (OR $a, $b), -1)
VNOR,

View File

@ -23,6 +23,8 @@ def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
SDTCisInt<1>, SDTCisVec<1>,
SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>;
def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>;
@ -39,6 +41,7 @@ def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp,
def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>;
def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>;
def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
@ -1074,6 +1077,16 @@ class MSA_I8_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass Itinerary = itin;
}
class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterClass RCWD,
RegisterClass RCWS = RCWD,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs RCWD:$wd);
dag InOperandList = (ins RCWS:$ws, uimm8:$u8);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
list<dag> Pattern = [(set RCWD:$wd, (MipsSHF immZExt8:$u8, RCWS:$ws))];
InstrItinClass Itinerary = itin;
}
class MSA_I10_LDI_DESC_BASE<string instr_asm, RegisterClass RCWD,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs RCWD:$wd);
@ -2066,9 +2079,9 @@ class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>;
class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>;
class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>;
class SHF_B_DESC : MSA_I8_X_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>;
class SHF_H_DESC : MSA_I8_X_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>;
class SHF_W_DESC : MSA_I8_X_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>;
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128B>;
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128H>;
class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128W>;
class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>;
class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>;

View File

@ -1428,6 +1428,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_pcnt_w:
case Intrinsic::mips_pcnt_d:
return lowerMSAUnaryIntr(Op, DAG, ISD::CTPOP);
case Intrinsic::mips_shf_b:
case Intrinsic::mips_shf_h:
case Intrinsic::mips_shf_w:
return DAG.getNode(MipsISD::SHF, SDLoc(Op), Op->getValueType(0),
Op->getOperand(2), Op->getOperand(1));
case Intrinsic::mips_sll_b:
case Intrinsic::mips_sll_h:
case Intrinsic::mips_sll_w:
@ -1735,6 +1740,72 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
// Lower VECTOR_SHUFFLE into SHF (if possible).
//
// SHF splits the vector into blocks of four elements, then shuffles these
// elements according to a <4 x i2> constant (encoded as an integer immediate).
//
// It is therefore possible to lower into SHF when the mask takes the form:
// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
// When undef's appear they are treated as if they were whatever value is
// necessary in order to fit the above form.
//
// For example:
// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
// i32 7, i32 6, i32 5, i32 4>
// is lowered to:
// (SHF_H $w0, $w1, 27)
// where the 27 comes from:
// 3 + (2 << 2) + (1 << 4) + (0 << 6)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
SmallVector<int, 16> Indices,
SelectionDAG &DAG) {
int SHFIndices[4] = { -1, -1, -1, -1 };
if (Indices.size() < 4)
return SDValue();
for (unsigned i = 0; i < 4; ++i) {
for (unsigned j = i; j < Indices.size(); j += 4) {
int Idx = Indices[j];
// Convert from vector index to 4-element subvector index
// If an index refers to an element outside of the subvector then give up
if (Idx != -1) {
Idx -= 4 * (j / 4);
if (Idx < 0 || Idx >= 4)
return SDValue();
}
// If the mask has an undef, replace it with the current index.
// Note that it might still be undef if the current index is also undef
if (SHFIndices[i] == -1)
SHFIndices[i] = Idx;
// Check that non-undef values are the same as in the mask. If they
// aren't then give up
if (!(Idx == -1 || Idx == SHFIndices[i]))
return SDValue();
}
}
// Calculate the immediate. Replace any remaining undefs with zero
APInt Imm(32, 0);
for (int i = 3; i >= 0; --i) {
int Idx = SHFIndices[i];
if (Idx == -1)
Idx = 0;
Imm <<= 2;
Imm |= Idx & 0x3;
}
return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy,
DAG.getConstant(Imm, MVT::i32), Op->getOperand(0));
}
// Lower VECTOR_SHUFFLE into VSHF.
//
// This mostly consists of converting the shuffle indices in Indices into a
@ -1802,6 +1873,9 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
for (int i = 0; i < ResTyNumElts; ++i)
Indices.push_back(Node->getMaskElt(i));
SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
if (Result.getNode())
return Result;
return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
}

View File

@ -156,14 +156,16 @@ define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK: .size vshf_v8i16_4
}
; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
; instruction when using a single vector.
define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: vshf_v4i32_0:
%1 = load <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
@ -177,8 +179,7 @@ define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
%1 = load <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
@ -193,8 +194,7 @@ define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
%2 = load <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
store <4 x i32> %3, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
@ -225,8 +225,7 @@ define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
%1 = load <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
@ -311,3 +310,47 @@ define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
ret void
; CHECK: .size vshf_v2i64_4
}
define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: shf_v16i8_0:
%1 = load <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
store <16 x i8> %2, <16 x i8>* %c
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
; CHECK: .size shf_v16i8_0
}
define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: shf_v8i16_0:
%1 = load <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
store <8 x i16> %2, <8 x i16>* %c
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
; CHECK: .size shf_v8i16_0
}
define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: shf_v4i32_0:
%1 = load <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
; CHECK: .size shf_v4i32_0
}
; shf.d does not exist