mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
R600/SI: Remove SIISelLowering::legalizeOperands()
Its functionality has been replaced by calling SIInstrInfo::legalizeOperands() from SIISelLowering::AdjstInstrPostInstrSelection() and running the SIFoldOperands and SIShrinkInstructions passes. llvm-svn: 225445
This commit is contained in:
parent
9ab7f6e415
commit
d8d9d6ab95
@ -1690,12 +1690,6 @@ static bool isVSrc(unsigned RegClass) {
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Test if RegClass is one of the SSrc classes
|
||||
static bool isSSrc(unsigned RegClass) {
|
||||
return AMDGPU::SSrc_32RegClassID == RegClass ||
|
||||
AMDGPU::SSrc_64RegClassID == RegClass;
|
||||
}
|
||||
|
||||
/// \brief Analyze the possible immediate value Op
|
||||
///
|
||||
/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
|
||||
@ -1728,44 +1722,6 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/// \brief Try to fold an immediate directly into an instruction
|
||||
bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
|
||||
bool &ScalarSlotUsed) const {
|
||||
|
||||
MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
|
||||
getTargetMachine().getSubtargetImpl()->getInstrInfo());
|
||||
if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
|
||||
return false;
|
||||
|
||||
const SDValue &Op = Mov->getOperand(0);
|
||||
int32_t Value = analyzeImmediate(Op.getNode());
|
||||
if (Value == -1) {
|
||||
// Not an immediate at all
|
||||
return false;
|
||||
|
||||
} else if (Value == 0) {
|
||||
// Inline immediates can always be fold
|
||||
Operand = Op;
|
||||
return true;
|
||||
|
||||
} else if (Value == Immediate) {
|
||||
// Already fold literal immediate
|
||||
Operand = Op;
|
||||
return true;
|
||||
|
||||
} else if (!ScalarSlotUsed && !Immediate) {
|
||||
// Fold this literal immediate
|
||||
ScalarSlotUsed = true;
|
||||
Immediate = Value;
|
||||
Operand = Op;
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SITargetLowering::getRegClassForNode(
|
||||
SelectionDAG &DAG, const SDValue &Op) const {
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
|
||||
@ -1829,133 +1785,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
|
||||
return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
|
||||
}
|
||||
|
||||
/// \returns true if \p Node's operands are different from the SDValue list
|
||||
/// \p Ops
|
||||
static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
|
||||
for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) {
|
||||
if (Ops[i].getNode() != Node->getOperand(i).getNode()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// TODO: This needs to be removed. It's current primary purpose is to fold
|
||||
/// immediates into operands when legal. The legalization parts are redundant
|
||||
/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
|
||||
SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
|
||||
SelectionDAG &DAG) const {
|
||||
// Original encoding (either e32 or e64)
|
||||
int Opcode = Node->getMachineOpcode();
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
|
||||
getTargetMachine().getSubtargetImpl()->getInstrInfo());
|
||||
const MCInstrDesc *Desc = &TII->get(Opcode);
|
||||
|
||||
unsigned NumDefs = Desc->getNumDefs();
|
||||
unsigned NumOps = Desc->getNumOperands();
|
||||
|
||||
// Commuted opcode if available
|
||||
int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
|
||||
const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
|
||||
|
||||
assert(!DescRev || DescRev->getNumDefs() == NumDefs);
|
||||
assert(!DescRev || DescRev->getNumOperands() == NumOps);
|
||||
|
||||
int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
|
||||
bool HaveVSrc = false, HaveSSrc = false;
|
||||
|
||||
// First figure out what we already have in this instruction.
|
||||
for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
|
||||
i != e && Op < NumOps; ++i, ++Op) {
|
||||
|
||||
unsigned RegClass = Desc->OpInfo[Op].RegClass;
|
||||
if (isVSrc(RegClass))
|
||||
HaveVSrc = true;
|
||||
else if (isSSrc(RegClass))
|
||||
HaveSSrc = true;
|
||||
else
|
||||
continue;
|
||||
|
||||
int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
|
||||
if (Imm != -1 && Imm != 0) {
|
||||
// Literal immediate
|
||||
Immediate = Imm;
|
||||
}
|
||||
}
|
||||
|
||||
// If we neither have VSrc nor SSrc, it makes no sense to continue.
|
||||
if (!HaveVSrc && !HaveSSrc)
|
||||
return Node;
|
||||
|
||||
// No scalar allowed when we have both VSrc and SSrc
|
||||
bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
|
||||
|
||||
// If this instruction has an implicit use of VCC, then it can't use the
|
||||
// constant bus.
|
||||
for (unsigned i = 0, e = Desc->getNumImplicitUses(); i != e; ++i) {
|
||||
if (Desc->ImplicitUses[i] == AMDGPU::VCC) {
|
||||
ScalarSlotUsed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Second go over the operands and try to fold them
|
||||
std::vector<SDValue> Ops;
|
||||
for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
|
||||
i != e && Op < NumOps; ++i, ++Op) {
|
||||
|
||||
const SDValue &Operand = Node->getOperand(i);
|
||||
Ops.push_back(Operand);
|
||||
|
||||
// Already folded immediate?
|
||||
if (isa<ConstantSDNode>(Operand.getNode()) ||
|
||||
isa<ConstantFPSDNode>(Operand.getNode()))
|
||||
continue;
|
||||
|
||||
// Is this a VSrc or SSrc operand?
|
||||
unsigned RegClass = Desc->OpInfo[Op].RegClass;
|
||||
if (isVSrc(RegClass) || isSSrc(RegClass)) {
|
||||
// Try to fold the immediates. If this ends up with multiple constant bus
|
||||
// uses, it will be legalized later.
|
||||
foldImm(Ops[i], Immediate, ScalarSlotUsed);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
|
||||
|
||||
unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
|
||||
assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
|
||||
|
||||
// Test if it makes sense to swap operands
|
||||
if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
|
||||
(!fitsRegClass(DAG, Ops[1], RegClass) &&
|
||||
fitsRegClass(DAG, Ops[1], OtherRegClass))) {
|
||||
|
||||
// Swap commutable operands
|
||||
std::swap(Ops[0], Ops[1]);
|
||||
|
||||
Desc = DescRev;
|
||||
DescRev = nullptr;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add optional chain and glue
|
||||
for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
|
||||
Ops.push_back(Node->getOperand(i));
|
||||
|
||||
// Nodes that have a glue result are not CSE'd by getMachineNode(), so in
|
||||
// this case a brand new node is always be created, even if the operands
|
||||
// are the same as before. So, manually check if anything has been changed.
|
||||
if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) {
|
||||
return Node;
|
||||
}
|
||||
|
||||
// Create a complete new instruction
|
||||
return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
|
||||
/// \brief Helper function for adjustWritemask
|
||||
static unsigned SubIdx2Lane(unsigned Idx) {
|
||||
switch (Idx) {
|
||||
@ -2084,8 +1913,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||
legalizeTargetIndependentNode(Node, DAG);
|
||||
return Node;
|
||||
}
|
||||
|
||||
return legalizeOperands(Node, DAG);
|
||||
return Node;
|
||||
}
|
||||
|
||||
/// \brief Assign the register class depending on the number of
|
||||
|
@ -42,14 +42,11 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
bool foldImm(SDValue &Operand, int32_t &Immediate,
|
||||
bool &ScalarSlotUsed) const;
|
||||
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
|
||||
const SDValue &Op) const;
|
||||
bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
|
||||
unsigned RegClass) const;
|
||||
|
||||
SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
|
||||
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
|
||||
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
|
||||
|
||||
|
@ -48,7 +48,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
|
||||
; R600: -KC0[2].Z
|
||||
|
||||
; XXX: We could use v_add_f32_e64 with the negate bit here instead.
|
||||
; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
|
||||
; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
|
||||
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
%bc = bitcast i32 %in to float
|
||||
%fsub = fsub float 0.0, %bc
|
||||
|
@ -112,7 +112,7 @@ define void @store_literal_imm_f32(float addrspace(1)* %out) {
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, 0.0
|
||||
@ -304,7 +304,7 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
|
||||
; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0.0
|
||||
|
@ -87,6 +87,27 @@ entry:
|
||||
store i32 %tmp1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
; CHECK-LABEL: {{^}}vector_imm:
|
||||
; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64
|
||||
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
|
||||
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
|
||||
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
|
||||
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
|
||||
|
||||
define void @vector_imm(<4 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = call i32 @llvm.r600.read.tidig.x()
|
||||
%tmp1 = add i32 %tmp0, 1
|
||||
%tmp2 = add i32 %tmp0, 2
|
||||
%tmp3 = add i32 %tmp0, 3
|
||||
%vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
|
||||
%vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
|
||||
%vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
|
||||
%vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
|
||||
%tmp4 = xor <4 x i32> <i32 100, i32 100, i32 100, i32 100>, %vec3
|
||||
store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
attributes #0 = { readnone }
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0.0, 1.0, [[CMP]]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
|
||||
define void @main(float %p) {
|
||||
main_body:
|
||||
%c = fcmp oeq float %p, %p
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0.0, 1.0, [[CMP]]
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
|
||||
define void @main(float %p) {
|
||||
main_body:
|
||||
%c = fcmp une float %p, %p
|
||||
|
@ -45,9 +45,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
|
||||
|
||||
; SI-LABEL: @v_sint_to_fp_i64_to_f64
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
|
||||
; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
|
||||
; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
|
||||
; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
|
||||
; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
|
@ -42,7 +42,7 @@ define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
|
||||
|
||||
; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0.0, 1.0, [[CMP]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
|
||||
|
@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
|
||||
; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
|
||||
; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
|
||||
; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
|
||||
; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
|
@ -60,7 +60,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0.0, 1.0, [[CMP]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) {
|
||||
|
@ -39,7 +39,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
|
||||
; FUNC-LABEL: {{^}}xor_i1:
|
||||
; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
|
||||
|
||||
; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0.0
|
||||
; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0
|
||||
; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
|
||||
; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
|
||||
|
Loading…
Reference in New Issue
Block a user