mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
R600/SI: Match sign_extend_inreg to s_sext_i32_i8 and s_sext_i32_i16
llvm-svn: 206547
This commit is contained in:
parent
3756e7abc3
commit
42cf57d738
@ -165,6 +165,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::i1, Custom);
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
|
||||
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
|
||||
|
@ -537,6 +537,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
||||
case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
|
||||
case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
|
||||
case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
|
||||
case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
|
||||
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
|
||||
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
|
||||
case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
|
||||
case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
|
||||
@ -915,8 +917,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
MachineBasicBlock *MBB = Inst->getParent();
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
|
||||
unsigned Opcode = Inst->getOpcode();
|
||||
|
||||
// Handle some special cases
|
||||
switch(Inst->getOpcode()) {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::S_MOV_B64: {
|
||||
DebugLoc DL = Inst->getDebugLoc();
|
||||
|
||||
@ -988,26 +992,28 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
Inst->RemoveOperand(i);
|
||||
}
|
||||
|
||||
// Add the implict and explicit register definitions.
|
||||
if (NewDesc.ImplicitUses) {
|
||||
for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
|
||||
unsigned Reg = NewDesc.ImplicitUses[i];
|
||||
Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
|
||||
}
|
||||
if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
|
||||
// We are converting these to a BFE, so we need to add the missing
|
||||
// operands for the size and offset.
|
||||
unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(Size));
|
||||
|
||||
// XXX - Other pointless operands. There are 4, but it seems you only need
|
||||
// 3 to not hit an assertion later in MCInstLower.
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
}
|
||||
|
||||
if (NewDesc.ImplicitDefs) {
|
||||
for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
|
||||
unsigned Reg = NewDesc.ImplicitDefs[i];
|
||||
Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
|
||||
}
|
||||
}
|
||||
addDescImplicitUseDef(NewDesc, Inst);
|
||||
|
||||
// Update the destination register class.
|
||||
|
||||
const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
|
||||
|
||||
switch (Inst->getOpcode()) {
|
||||
switch (Opcode) {
|
||||
// For target instructions, getOpRegClass just returns the virtual
|
||||
// register class associated with the operand, so we need to find an
|
||||
// equivalent VGPR register class in order to move the instruction to the
|
||||
@ -1120,6 +1126,24 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
Worklist.push_back(HiHalf);
|
||||
}
|
||||
|
||||
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
|
||||
MachineInstr *Inst) const {
|
||||
// Add the implict and explicit register definitions.
|
||||
if (NewDesc.ImplicitUses) {
|
||||
for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
|
||||
unsigned Reg = NewDesc.ImplicitUses[i];
|
||||
Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
|
||||
}
|
||||
}
|
||||
|
||||
if (NewDesc.ImplicitDefs) {
|
||||
for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
|
||||
unsigned Reg = NewDesc.ImplicitDefs[i];
|
||||
Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
|
@ -47,6 +47,7 @@ private:
|
||||
void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> & Worklist,
|
||||
MachineInstr *Inst, unsigned Opcode) const;
|
||||
|
||||
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
|
||||
|
||||
public:
|
||||
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
@ -68,8 +68,12 @@ def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
|
||||
//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
|
||||
def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
|
||||
//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
|
||||
//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
|
||||
//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
|
||||
def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
|
||||
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
|
||||
>;
|
||||
def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
|
||||
[(set i32:$dst, (sext_inreg i32:$src0, i16))]
|
||||
>;
|
||||
////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
|
||||
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
|
||||
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
|
||||
@ -124,6 +128,21 @@ def S_CMPK_EQ_I32 : SOPK <
|
||||
>;
|
||||
*/
|
||||
|
||||
// Handle sext_inreg in i64
|
||||
def : Pat <
|
||||
(i64 (sext_inreg i64:$src, i8)),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
||||
(S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
|
||||
(S_MOV_B32 -1), sub1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i64 (sext_inreg i64:$src, i16)),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
||||
(S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
|
||||
(S_MOV_B32 -1), sub1)
|
||||
>;
|
||||
|
||||
let isCompare = 1 in {
|
||||
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
|
||||
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
|
||||
|
@ -21,8 +21,9 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_i8_to_i32
|
||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||
; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
|
||||
; SI: BUFFER_STORE_DWORD [[EXTRACT]],
|
||||
; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
|
||||
; EG: ADD_INT
|
||||
@ -38,8 +39,9 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_i16_to_i32
|
||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||
; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16
|
||||
; SI: BUFFER_STORE_DWORD [[EXTRACT]],
|
||||
; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
|
||||
; EG: ADD_INT
|
||||
@ -55,8 +57,9 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
|
||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||
; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
|
||||
; SI: BUFFER_STORE_DWORD [[EXTRACT]],
|
||||
; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
|
||||
; EG: ADD_INT
|
||||
@ -71,9 +74,10 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_i8_to_i64
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||
; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
|
||||
@ -94,9 +98,10 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_i16_to_i64
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16
|
||||
; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||
; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
|
||||
@ -237,8 +242,8 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
|
||||
@ -254,10 +259,10 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: BUFFER_STORE_DWORDX4
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
|
||||
@ -275,8 +280,8 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
|
||||
; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
|
||||
@ -285,8 +290,8 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
|
||||
; EG: LSHR {{\*?}} [[ADDR]]
|
||||
define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
|
||||
%shl = shl <2 x i32> %c, <i32 24, i32 24>
|
||||
%ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
|
||||
%shl = shl <2 x i32> %c, <i32 16, i32 16>
|
||||
%ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
|
||||
store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user