mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
R600: Fix min/max matching problems with unordered compares
The returned operand needs to be permuted for the unordered compares. Also fix incorrectly producing fmin_legacy / fmax_legacy for f64, which don't exist. llvm-svn: 224094
This commit is contained in:
parent
89a384686e
commit
c1a6f36235
@ -1038,17 +1038,21 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
||||
}
|
||||
|
||||
/// \brief Generate Min/Max node
|
||||
SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return SDValue();
|
||||
|
||||
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
switch (CCOpcode) {
|
||||
case ISD::SETOEQ:
|
||||
@ -1065,34 +1069,52 @@ SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
|
||||
case ISD::SETO:
|
||||
break;
|
||||
case ISD::SETULE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULT: {
|
||||
// Unordered.
|
||||
//
|
||||
// We will allow this before legalization since we expand unordered compares
|
||||
// ordinarily.
|
||||
if (LHS == True)
|
||||
return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
|
||||
return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
|
||||
}
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETLE:
|
||||
case ISD::SETLT: {
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
break;
|
||||
// Ordered. Assume ordered for undefined.
|
||||
|
||||
// Only do this after legalization to avoid interfering with other combines
|
||||
// which might occur.
|
||||
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
|
||||
!DCI.isCalledByLegalizer())
|
||||
return SDValue();
|
||||
|
||||
// We need to permute the operands to get the correct NaN behavior. The
|
||||
// selected operand is the second one based on the failing compare with NaN,
|
||||
// so permute it based on the compare type the hardware uses.
|
||||
if (LHS == True)
|
||||
return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
|
||||
return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
|
||||
return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
|
||||
return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
|
||||
}
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
case ISD::SETUGE:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETOGT: {
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
break;
|
||||
|
||||
case ISD::SETUGT: {
|
||||
if (LHS == True)
|
||||
return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
|
||||
return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
|
||||
}
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETOGT: {
|
||||
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
|
||||
!DCI.isCalledByLegalizer())
|
||||
return SDValue();
|
||||
|
||||
if (LHS == True)
|
||||
return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
|
||||
return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
|
||||
}
|
||||
case ISD::SETCC_INVALID:
|
||||
llvm_unreachable("Invalid setcc condcode!");
|
||||
}
|
||||
@ -2276,24 +2298,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
simplifyI24(N1, DCI);
|
||||
return SDValue();
|
||||
}
|
||||
case ISD::SELECT_CC: {
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (VT == MVT::f32 ||
|
||||
(VT == MVT::f64 &&
|
||||
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
SDValue True = N->getOperand(2);
|
||||
SDValue False = N->getOperand(3);
|
||||
SDValue CC = N->getOperand(4);
|
||||
|
||||
return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT: {
|
||||
SDValue Cond = N->getOperand(0);
|
||||
if (Cond.getOpcode() == ISD::SETCC) {
|
||||
@ -2306,11 +2310,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
SDValue True = N->getOperand(1);
|
||||
SDValue False = N->getOperand(2);
|
||||
|
||||
if (VT == MVT::f32 ||
|
||||
(VT == MVT::f64 &&
|
||||
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
|
||||
return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
|
||||
}
|
||||
if (VT == MVT::f32)
|
||||
return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
|
||||
|
||||
// TODO: Implement min / max Evergreen instructions.
|
||||
if (VT == MVT::i32 &&
|
||||
|
@ -145,14 +145,14 @@ public:
|
||||
|
||||
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue CombineFMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue CombineFMinMaxLegacy(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
SDValue True,
|
||||
SDValue False,
|
||||
SDValue CC,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
SDValue CombineIMinMax(SDLoc DL,
|
||||
EVT VT,
|
||||
SDValue LHS,
|
||||
|
@ -1118,6 +1118,13 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
SDValue CC = Op.getOperand(4);
|
||||
SDValue Temp;
|
||||
|
||||
if (VT == MVT::f32) {
|
||||
DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
|
||||
SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
|
||||
if (MinMax)
|
||||
return MinMax;
|
||||
}
|
||||
|
||||
// LHS and RHS are guaranteed to be the same value type
|
||||
EVT CompareVT = LHS.getValueType();
|
||||
|
||||
|
@ -1514,6 +1514,7 @@ let isCommutable = 1 in {
|
||||
defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
|
||||
VOP_F32_F32_F32
|
||||
>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmin_legacy
|
||||
@ -1522,6 +1523,7 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmax_legacy
|
||||
>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
|
||||
defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
|
||||
VOP_I32_I32_I32, sra
|
||||
|
67
test/CodeGen/R600/fmax_legacy.f64.ll
Normal file
67
test/CodeGen/R600/fmax_legacy.f64.ll
Normal file
@ -0,0 +1,67 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; Make sure we don't try to form FMAX_LEGACY nodes with f64
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; FUNC-LABEL: @test_fmax_legacy_uge_f64
|
||||
define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp uge double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_legacy_oge_f64
|
||||
define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp oge double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_legacy_ugt_f64
|
||||
define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp ugt double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_legacy_ogt_f64
|
||||
define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp ogt double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
||||
; FUNC-LABEL: @test_fmax_legacy_uge_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; EG: MAX
|
||||
define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
@ -44,7 +44,7 @@ define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmax_legacy_ugt_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; EG: MAX
|
||||
define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
77
test/CodeGen/R600/fmin_legacy.f64.ll
Normal file
77
test/CodeGen/R600/fmin_legacy.f64.ll
Normal file
@ -0,0 +1,77 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_f64
|
||||
define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x double> %reg0, i32 0
|
||||
%r1 = extractelement <4 x double> %reg0, i32 1
|
||||
%r2 = fcmp uge double %r0, %r1
|
||||
%r3 = select i1 %r2, double %r1, double %r0
|
||||
%vec = insertelement <4 x double> undef, double %r3, i32 0
|
||||
store <4 x double> %vec, <4 x double> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_ule_f64
|
||||
define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp ule double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_ole_f64
|
||||
define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp ole double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_olt_f64
|
||||
define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp olt double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_ult_f64
|
||||
define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load double addrspace(1)* %gep.0, align 8
|
||||
%b = load double addrspace(1)* %gep.1, align 8
|
||||
|
||||
%cmp = fcmp ult double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
@ -19,7 +19,7 @@ define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> in
|
||||
; FUNC-LABEL: @test_fmin_legacy_ule_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
@ -73,7 +73,7 @@ define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmin_legacy_ult_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
|
Loading…
x
Reference in New Issue
Block a user