1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

R600/SI: add cummuting of rev instructions

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
llvm-svn: 178127
This commit is contained in:
Christian Konig 2013-03-27 09:12:59 +00:00
parent 231ee3f1ae
commit fb305cbcea
8 changed files with 90 additions and 37 deletions

View File

@ -544,6 +544,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
unsigned NumDefs = Desc->getNumDefs(); unsigned NumDefs = Desc->getNumDefs();
unsigned NumOps = Desc->getNumOperands(); unsigned NumOps = Desc->getNumOperands();
// Commuted opcode if available
int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
assert(!DescRev || DescRev->getNumDefs() == NumDefs);
assert(!DescRev || DescRev->getNumOperands() == NumOps);
// e64 version if available, -1 otherwise // e64 version if available, -1 otherwise
int OpcodeE64 = AMDGPU::getVOPe64(Opcode); int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
@ -605,8 +612,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
continue; continue;
} }
if (i == 1 && Desc->isCommutable() && if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
fitsRegClass(DAG, Ops[0], RegClass)) {
unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
@ -620,6 +626,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SDValue Tmp = Ops[1]; SDValue Tmp = Ops[1];
Ops[1] = Ops[0]; Ops[1] = Ops[0];
Ops[0] = Tmp; Ops[0] = Tmp;
Desc = DescRev;
DescRev = 0;
continue; continue;
} }
} }
@ -655,10 +664,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
Ops.push_back(Node->getOperand(i)); Ops.push_back(Node->getOperand(i));
// Either create a complete new or update the current instruction // Create a complete new instruction
if (Promote2e64) return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), Node->getVTList(), Ops.data(), Ops.size());
Node->getVTList(), Ops.data(), Ops.size());
else
return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
} }

View File

@ -158,6 +158,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} }
} }
unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
int NewOpc;
// Try to map original to commuted opcode
if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
return NewOpc;
// Try to map commuted to original opcode
if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
return NewOpc;
return Opcode;
}
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const { bool NewMI) const {
@ -165,7 +180,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
!MI->getOperand(2).isReg()) !MI->getOperand(2).isReg())
return 0; return 0;
return TargetInstrInfo::commuteInstruction(MI, NewMI); MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
if (MI)
MI->setDesc(get(commuteOpcode(MI->getOpcode())));
return MI;
} }
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,

View File

@ -35,6 +35,8 @@ public:
unsigned DestReg, unsigned SrcReg, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const; bool KillSrc) const;
unsigned commuteOpcode(unsigned Opcode) const;
virtual MachineInstr *commuteInstruction(MachineInstr *MI, virtual MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI=false) const; bool NewMI=false) const;
@ -76,6 +78,8 @@ public:
namespace AMDGPU { namespace AMDGPU {
int getVOPe64(uint16_t Opcode); int getVOPe64(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
} // End namespace AMDGPU } // End namespace AMDGPU

View File

@ -138,6 +138,11 @@ class VOP <string opName> {
string OpName = opName; string OpName = opName;
} }
class VOP2_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
}
multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src, multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
string opName, list<dag> pattern> { string opName, list<dag> pattern> {
@ -166,11 +171,11 @@ multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>; : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> { string opName, list<dag> pattern, string revOp> {
def _e32 : VOP2 < def _e32 : VOP2 <
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
opName#"_e32 $dst, $src0, $src1", pattern opName#"_e32 $dst, $src0, $src1", pattern
>, VOP <opName>; >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _e64 : VOP3 < def _e64 : VOP3 <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@ -179,23 +184,26 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
i32imm:$abs, i32imm:$clamp, i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg), i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
>, VOP <opName> { >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let SRC2 = SIOperand.ZERO; let SRC2 = SIOperand.ZERO;
} }
} }
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
: VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>; string revOp = opName>
: VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
: VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>; string revOp = opName>
: VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
string revOp = opName> {
def _e32 : VOP2 < def _e32 : VOP2 <
op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
opName#"_e32 $dst, $src0, $src1", pattern opName#"_e32 $dst, $src0, $src1", pattern
>, VOP <opName>; >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _e64 : VOP3b < def _e64 : VOP3b <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@ -204,7 +212,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> {
i32imm:$abs, i32imm:$clamp, i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg), i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
>, VOP <opName> { >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let SRC2 = SIOperand.ZERO; let SRC2 = SIOperand.ZERO;
/* the VOP2 variant puts the carry out into VCC, the VOP3 variant /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
can write it into any SGPR. We currently don't use the carry out, can write it into any SGPR. We currently don't use the carry out,
@ -327,4 +335,22 @@ def getVOPe64 : InstrMapping {
let ValueCols = [["8"]]; let ValueCols = [["8"]];
} }
// Maps an original opcode to its commuted version
def getCommuteRev : InstrMapping {
let FilterClass = "VOP2_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["1"];
let ValueCols = [["0"]];
}
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "VOP2_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
}
include "SIInstructions.td" include "SIInstructions.td"

View File

@ -804,13 +804,13 @@ let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
[(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
>; >;
} // End isCommutable = 1
defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
[(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
>; >;
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
} // End isCommutable = 1
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
let isCommutable = 1 in { let isCommutable = 1 in {
@ -848,22 +848,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
} // End isCommutable = 1
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
[(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))] [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
>; >;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
[(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))] [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
>; >;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
>; >;
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
let isCommutable = 1 in {
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
[(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
@ -884,25 +882,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let Defs = [VCC] in { // Carry-out goes to VCC
let isCommutable = 1 in { let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
[(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>; >;
} // End isCommutable = 1
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
[(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>; >;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>;
let Uses = [VCC] in { // Carry-out comes from VCC let Uses = [VCC] in { // Carry-out comes from VCC
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
} // End Uses = [VCC] } // End Uses = [VCC]
} // End Defs = [VCC] } // End isCommutable = 1, Defs = [VCC]
defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;

View File

@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
;CHECK: V_LSHL_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 ;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
define void @test(i32 %p) { define void @test(i32 %p) {
%i = mul i32 %p, 2 %i = mul i32 %p, 2

View File

@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
;CHECK: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 ;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
define void @test(i32 %p) { define void @test(i32 %p) {
%i = udiv i32 %p, 2 %i = udiv i32 %p, 2

View File

@ -2,7 +2,7 @@
;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 ;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 ;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
;CHECK-NEXT: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 ;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
define void @test(i32 %p) { define void @test(i32 %p) {
%i = udiv i32 %p, 3 %i = udiv i32 %p, 3