1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

ARM NEON refactor VST2 w/ writeback instructions.

In addition to improving the representation, this adds support for assembly
parsing of these instructions.

llvm-svn: 146588
This commit is contained in:
Jim Grosbach 2011-12-14 21:32:11 +00:00
parent 7fe9f4b949
commit 2dac770227
4 changed files with 130 additions and 54 deletions

View File

@ -308,18 +308,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
@ -1193,12 +1199,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
case ARM::VST2d8Pseudo_UPD:
case ARM::VST2d16Pseudo_UPD:
case ARM::VST2d32Pseudo_UPD:
case ARM::VST2q8Pseudo_UPD:
case ARM::VST2q16Pseudo_UPD:
case ARM::VST2q32Pseudo_UPD:
case ARM::VST2d8PseudoWB_fixed:
case ARM::VST2d16PseudoWB_fixed:
case ARM::VST2d32PseudoWB_fixed:
case ARM::VST2q8PseudoWB_fixed:
case ARM::VST2q16PseudoWB_fixed:
case ARM::VST2q32PseudoWB_fixed:
case ARM::VST2d8PseudoWB_register:
case ARM::VST2d16PseudoWB_register:
case ARM::VST2d32PseudoWB_register:
case ARM::VST2q8PseudoWB_register:
case ARM::VST2q16PseudoWB_register:
case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:

View File

@ -1589,6 +1589,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@ -1806,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
@ -2889,10 +2895,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
ARM::VST2q32Pseudo_UPD };
unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
ARM::VST2d16PseudoWB_fixed,
ARM::VST2d32PseudoWB_fixed,
ARM::VST1q64PseudoWB_fixed};
unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}

View File

@ -1523,44 +1523,90 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd),
IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
//class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
// : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
// (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd),
// IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
// let Inst{5-4} = Rn{5-4};
// let DecoderMethod = "DecodeVSTInstruction";
//}
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
RegisterOperand VdTy> {
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
class VST2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u,
"vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
//class VST2QWB<bits<4> op7_4, string Dt>
// : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
// (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u,
// "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
// let Inst{5-4} = Rn{5-4};
// let DecoderMethod = "DecodeVSTInstruction";
//}
multiclass VST2QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
let AsmMatchConverter = "cvtVSTwbRegister";
}
}
def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>

View File

@ -2221,15 +2221,24 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1d16Qwb_register:
case ARM::VST1d32Qwb_register:
case ARM::VST1d64Qwb_register:
case ARM::VST2d8_UPD:
case ARM::VST2d16_UPD:
case ARM::VST2d32_UPD:
case ARM::VST2q8_UPD:
case ARM::VST2q16_UPD:
case ARM::VST2q32_UPD:
case ARM::VST2b8_UPD:
case ARM::VST2b16_UPD:
case ARM::VST2b32_UPD:
case ARM::VST2d8wb_fixed:
case ARM::VST2d16wb_fixed:
case ARM::VST2d32wb_fixed:
case ARM::VST2d8wb_register:
case ARM::VST2d16wb_register:
case ARM::VST2d32wb_register:
case ARM::VST2q8wb_fixed:
case ARM::VST2q16wb_fixed:
case ARM::VST2q32wb_fixed:
case ARM::VST2q8wb_register:
case ARM::VST2q16wb_register:
case ARM::VST2q32wb_register:
case ARM::VST2b8wb_fixed:
case ARM::VST2b16wb_fixed:
case ARM::VST2b32wb_fixed:
case ARM::VST2b8wb_register:
case ARM::VST2b16wb_register:
case ARM::VST2b32wb_register:
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD: