1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

AMDGPU/GlobalISel: Add integer med3 combines

Add signed and unsigned integer version of med3 combine.
Source pattern is min(max(Val, K0), K1) or max(min(Val, K1), K0)
where K0 and K1 are constants and K0 <= K1. Destination is med3
that corresponds to signedness of min/max in source.

Differential Revision: https://reviews.llvm.org/D90050
This commit is contained in:
Petar Avramovic 2021-04-27 11:51:22 +02:00
parent e6701b4dfd
commit 495d2a275a
10 changed files with 1062 additions and 7 deletions

View File

@ -45,6 +45,17 @@ def clamp_i64_to_i16 : GICombineRule<
[{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]),
(apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;
def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">;
def int_minmax_to_med3 : GICombineRule<
(defs root:$min_or_max, med3_matchdata:$matchinfo),
(match (wip_match_opcode G_SMAX,
G_SMIN,
G_UMAX,
G_UMIN):$min_or_max,
[{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
(apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
@ -64,6 +75,8 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
"AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold]> {
"AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
let AdditionalArguments = [];
}

View File

@ -167,7 +167,8 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
def : GINodeEquiv<G_AMDGPU_MED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;

View File

@ -143,7 +143,7 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
auto Bitcast = B.buildBitcast({S32}, CvtPk);
auto Med3 = B.buildInstr(
AMDGPU::G_AMDGPU_MED3, {S32},
AMDGPU::G_AMDGPU_SMED3, {S32},
{MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
MI.getFlags());

View File

@ -13,7 +13,9 @@
#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@ -27,6 +29,126 @@
using namespace llvm;
using namespace MIPatternMatch;
class AMDGPURegBankCombinerHelper {
protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
CombinerHelper &Helper;
public:
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
RBI(*MF.getSubtarget().getRegBankInfo()),
TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
struct MinMaxMedOpc {
unsigned Min, Max, Med;
};
struct Med3MatchInfo {
unsigned Opc;
Register Val0, Val1, Val2;
};
MinMaxMedOpc getMinMaxPair(unsigned Opc);
template <class m_Cst>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
Register &Val, Register &K0, Register &K1);
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
};
bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}
AMDGPURegBankCombinerHelper::MinMaxMedOpc
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Unsupported opcode");
case AMDGPU::G_SMAX:
case AMDGPU::G_SMIN:
return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
case AMDGPU::G_UMAX:
case AMDGPU::G_UMIN:
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
}
}
template <class m_Cst>
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
MachineRegisterInfo &MRI,
MinMaxMedOpc MMMOpc, Register &Val,
Register &K0, Register &K1) {
// 4 operand commutes of: min(max(Val, K0), K1).
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
// 4 operand commutes of: max(min(Val, K1), K0).
// Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
// Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
return mi_match(
MI, MRI,
m_any_of(
m_CommutativeBinOp(
MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
m_Cst(K1)),
m_CommutativeBinOp(
MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
m_Cst(K0))));
}
bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
Register Dst = MI.getOperand(0).getReg();
if (!isVgprRegBank(Dst))
return false;
if (MRI.getType(Dst).isVector())
return false;
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
Register Val, K0, K1;
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;
const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
return false;
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
return false;
MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
return true;
}
void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
{MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
MI.eraseFromParent();
}
class AMDGPURegBankCombinerHelperState {
protected:
CombinerHelper &Helper;
AMDGPURegBankCombinerHelper &RegBankHelper;
public:
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
AMDGPURegBankCombinerHelper &RegBankHelper)
: Helper(Helper), RegBankHelper(RegBankHelper) {}
};
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenRegBankGICombiner.inc"
@ -62,9 +184,11 @@ bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg);
AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
RegBankHelper);
if (Generated.tryCombineAll(Observer, MI, B, Helper))
if (Generated.tryCombineAll(Observer, MI, B))
return true;
return false;

View File

@ -3507,7 +3507,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
case AMDGPU::G_AMDGPU_MED3:
case AMDGPU::G_AMDGPU_SMED3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {

View File

@ -2652,7 +2652,13 @@ def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
def G_AMDGPU_MED3 : AMDGPUGenericInstruction {
def G_AMDGPU_SMED3 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
let hasSideEffects = 0;
}
def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
let hasSideEffects = 0;

View File

@ -0,0 +1,328 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
---
name: test_min_max_ValK0_K1_i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: min_max_ValK0_K1_i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_min_K1max_ValK0__i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_ValK0__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_min_K1max_K0Val__i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_K0Val__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_min_ValK1_K0_i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_ValK1_K0_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_min_K1Val_K0_i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_K1Val_K0_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_K0min_ValK1__i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_ValK1__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_K0min_K1Val__i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_K0min_K1Val__v2i16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%9:sgpr(s32) = G_CONSTANT i32 17
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
%10:sgpr(s32) = G_CONSTANT i32 -12
%5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
%11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
%4:vgpr(<2 x s16>) = G_SMIN %11, %0
%12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
%7:vgpr(<2 x s16>) = G_SMAX %12, %4
$vgpr0 = COPY %7(<2 x s16>)
%8:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %8, implicit $vgpr0
...
---
name: test_uniform_min_max
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $sgpr2
; CHECK-LABEL: name: test_uniform_min_max
; CHECK: liveins: $sgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK: $sgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = G_CONSTANT i32 -12
%4:sgpr(s32) = G_SMAX %0, %3
%5:sgpr(s32) = G_CONSTANT i32 17
%6:sgpr(s32) = G_SMIN %4, %5
%8:vgpr(s32) = COPY %6(s32)
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
...

View File

@ -0,0 +1,329 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
---
name: test_min_max_ValK0_K1_u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: min_max_ValK0_K1_i32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_min_K1max_ValK0__u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_ValK0__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_min_K1max_K0Val__u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_K0Val__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_min_ValK1_K0_u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_min_K1Val_K0_u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %3, %8
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_K0min_ValK1__u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_ValK1__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_K0min_K1Val__u32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %8, %3
$vgpr0 = COPY %5(s32)
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
---
name: test_max_K0min_K1Val__v2u16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%9:sgpr(s32) = G_CONSTANT i32 17
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
%10:sgpr(s32) = G_CONSTANT i32 12
%5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
%11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
%4:vgpr(<2 x s16>) = G_UMIN %11, %0
%12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
%7:vgpr(<2 x s16>) = G_UMAX %12, %4
$vgpr0 = COPY %7(<2 x s16>)
%8:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %8, implicit $vgpr0
...
---
name: test_uniform_min_max
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $sgpr2
; CHECK-LABEL: name: test_uniform_min_max
; CHECK: liveins: $sgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK: $sgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = G_CONSTANT i32 12
%4:sgpr(s32) = G_UMAX %0, %3
%5:sgpr(s32) = G_CONSTANT i32 17
%6:sgpr(s32) = G_UMIN %4, %5
%8:vgpr(s32) = COPY %6(s32)
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
...

View File

@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
; GFX10-LABEL: test_min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
ret i32 %smed
}
define i32 @min_max_ValK0_K1_i32(i32 %a) {
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
ret i32 %smed
}
define i32 @test_min_K1max_ValK0__i32(i32 %a) {
; GFX10-LABEL: test_min_K1max_ValK0__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
%smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
ret i32 %smed
}
define i32 @test_min_K1max_K0Val__i32(i32 %a) {
; GFX10-LABEL: test_min_K1max_K0Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
%smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
ret i32 %smed
}
define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
; GFX10-LABEL: test_max_min_ValK1_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
%smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
ret i32 %smed
}
define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
; GFX10-LABEL: test_max_min_K1Val_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
%smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
ret i32 %smed
}
define i32 @test_max_K0min_ValK1__i32(i32 %a) {
; GFX10-LABEL: test_max_K0min_ValK1__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
%smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
ret i32 %smed
}
define i32 @test_max_K0min_K1Val__i32(i32 %a) {
; GFX10-LABEL: test_max_K0min_K1Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
%smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
ret i32 %smed
}
define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%smin = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
%smed = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 -12, i16 -12>, <2 x i16> %smin)
ret <2 x i16> %smed
}
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
; GFX10-LABEL: test_uniform_min_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_max_i32 s0, s2, -12
; GFX10-NEXT: s_min_i32 s0, s0, 17
; GFX10-NEXT: ; return to shader part epilog
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
ret i32 %smed
}
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)

View File

@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
ret i32 %umed
}
define i32 @min_max_ValK0_K1_i32(i32 %a) {
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
ret i32 %umed
}
define i32 @test_min_K1max_ValK0__u32(i32 %a) {
; GFX10-LABEL: test_min_K1max_ValK0__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
ret i32 %umed
}
define i32 @test_min_K1max_K0Val__u32(i32 %a) {
; GFX10-LABEL: test_min_K1max_K0Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
ret i32 %umed
}
define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
ret i32 %umed
}
define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
ret i32 %umed
}
define i32 @test_max_K0min_ValK1__u32(i32 %a) {
; GFX10-LABEL: test_max_K0min_ValK1__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
ret i32 %umed
}
define i32 @test_max_K0min_K1Val__u32(i32 %a) {
; GFX10-LABEL: test_max_K0min_K1Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
ret i32 %umed
}
define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
%umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
ret <2 x i16> %umed
}
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
; GFX10-LABEL: test_uniform_min_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_max_u32 s0, s2, 12
; GFX10-NEXT: s_min_u32 s0, s0, 17
; GFX10-NEXT: ; return to shader part epilog
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
ret i32 %umed
}
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)