mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
AMDGPU/GlobalISel: Add integer med3 combines
Add signed and unsigned integer version of med3 combine. Source pattern is min(max(Val, K0), K1) or max(min(Val, K1), K0) where K0 and K1 are constants and K0 <= K1. Destination is med3 that corresponds to signedness of min/max in source. Differential Revision: https://reviews.llvm.org/D90050
This commit is contained in:
parent
e6701b4dfd
commit
495d2a275a
@ -45,6 +45,17 @@ def clamp_i64_to_i16 : GICombineRule<
|
||||
[{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]),
|
||||
(apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;
|
||||
|
||||
def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">;
|
||||
|
||||
def int_minmax_to_med3 : GICombineRule<
|
||||
(defs root:$min_or_max, med3_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SMAX,
|
||||
G_SMIN,
|
||||
G_UMAX,
|
||||
G_UMIN):$min_or_max,
|
||||
[{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
|
||||
(apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
|
||||
|
||||
// Combines which should only apply on SI/VI
|
||||
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
|
||||
|
||||
@ -64,6 +75,8 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
|
||||
}
|
||||
|
||||
def AMDGPURegBankCombinerHelper : GICombinerHelper<
|
||||
"AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold]> {
|
||||
"AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3]> {
|
||||
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
|
||||
let StateClass = "AMDGPURegBankCombinerHelperState";
|
||||
let AdditionalArguments = [];
|
||||
}
|
||||
|
@ -167,7 +167,8 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
|
||||
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
|
||||
|
||||
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
|
||||
def : GINodeEquiv<G_AMDGPU_MED3, AMDGPUsmed3>;
|
||||
def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
|
||||
def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
|
||||
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
|
||||
|
@ -143,7 +143,7 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
|
||||
auto Bitcast = B.buildBitcast({S32}, CvtPk);
|
||||
|
||||
auto Med3 = B.buildInstr(
|
||||
AMDGPU::G_AMDGPU_MED3, {S32},
|
||||
AMDGPU::G_AMDGPU_SMED3, {S32},
|
||||
{MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
|
||||
MI.getFlags());
|
||||
|
||||
|
@ -13,7 +13,9 @@
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPULegalizerInfo.h"
|
||||
#include "AMDGPURegisterBankInfo.h"
|
||||
#include "GCNSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
||||
@ -27,6 +29,126 @@
|
||||
using namespace llvm;
|
||||
using namespace MIPatternMatch;
|
||||
|
||||
class AMDGPURegBankCombinerHelper {
|
||||
protected:
|
||||
MachineIRBuilder &B;
|
||||
MachineFunction &MF;
|
||||
MachineRegisterInfo &MRI;
|
||||
const RegisterBankInfo &RBI;
|
||||
const TargetRegisterInfo &TRI;
|
||||
CombinerHelper &Helper;
|
||||
|
||||
public:
|
||||
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
|
||||
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
|
||||
RBI(*MF.getSubtarget().getRegBankInfo()),
|
||||
TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
|
||||
|
||||
bool isVgprRegBank(Register Reg);
|
||||
|
||||
struct MinMaxMedOpc {
|
||||
unsigned Min, Max, Med;
|
||||
};
|
||||
|
||||
struct Med3MatchInfo {
|
||||
unsigned Opc;
|
||||
Register Val0, Val1, Val2;
|
||||
};
|
||||
|
||||
MinMaxMedOpc getMinMaxPair(unsigned Opc);
|
||||
|
||||
template <class m_Cst>
|
||||
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
|
||||
Register &Val, Register &K0, Register &K1);
|
||||
|
||||
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
|
||||
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
|
||||
};
|
||||
|
||||
bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
|
||||
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
|
||||
}
|
||||
|
||||
AMDGPURegBankCombinerHelper::MinMaxMedOpc
|
||||
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
llvm_unreachable("Unsupported opcode");
|
||||
case AMDGPU::G_SMAX:
|
||||
case AMDGPU::G_SMIN:
|
||||
return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
|
||||
case AMDGPU::G_UMAX:
|
||||
case AMDGPU::G_UMIN:
|
||||
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
|
||||
}
|
||||
}
|
||||
|
||||
template <class m_Cst>
|
||||
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MinMaxMedOpc MMMOpc, Register &Val,
|
||||
Register &K0, Register &K1) {
|
||||
// 4 operand commutes of: min(max(Val, K0), K1).
|
||||
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
|
||||
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
|
||||
// 4 operand commutes of: max(min(Val, K1), K0).
|
||||
// Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
|
||||
// Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
|
||||
return mi_match(
|
||||
MI, MRI,
|
||||
m_any_of(
|
||||
m_CommutativeBinOp(
|
||||
MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
|
||||
m_Cst(K1)),
|
||||
m_CommutativeBinOp(
|
||||
MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
|
||||
m_Cst(K0))));
|
||||
}
|
||||
|
||||
bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
|
||||
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
if (!isVgprRegBank(Dst))
|
||||
return false;
|
||||
|
||||
if (MRI.getType(Dst).isVector())
|
||||
return false;
|
||||
|
||||
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
|
||||
Register Val, K0, K1;
|
||||
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
|
||||
if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
|
||||
return false;
|
||||
|
||||
const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
|
||||
const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
|
||||
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
|
||||
return false;
|
||||
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
|
||||
return false;
|
||||
|
||||
MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
|
||||
return true;
|
||||
}
|
||||
|
||||
void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
|
||||
Med3MatchInfo &MatchInfo) {
|
||||
B.setInstrAndDebugLoc(MI);
|
||||
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
|
||||
{MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
class AMDGPURegBankCombinerHelperState {
|
||||
protected:
|
||||
CombinerHelper &Helper;
|
||||
AMDGPURegBankCombinerHelper &RegBankHelper;
|
||||
|
||||
public:
|
||||
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
|
||||
AMDGPURegBankCombinerHelper &RegBankHelper)
|
||||
: Helper(Helper), RegBankHelper(RegBankHelper) {}
|
||||
};
|
||||
|
||||
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
#include "AMDGPUGenRegBankGICombiner.inc"
|
||||
@ -62,9 +184,11 @@ bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
|
||||
MachineInstr &MI,
|
||||
MachineIRBuilder &B) const {
|
||||
CombinerHelper Helper(Observer, B, KB, MDT);
|
||||
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg);
|
||||
AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
|
||||
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
|
||||
RegBankHelper);
|
||||
|
||||
if (Generated.tryCombineAll(Observer, MI, B, Helper))
|
||||
if (Generated.tryCombineAll(Observer, MI, B))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -3507,7 +3507,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
|
||||
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
|
||||
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
|
||||
case AMDGPU::G_AMDGPU_MED3:
|
||||
case AMDGPU::G_AMDGPU_SMED3:
|
||||
return getDefaultMappingVOP(MI);
|
||||
case AMDGPU::G_UMULH:
|
||||
case AMDGPU::G_SMULH: {
|
||||
|
@ -2652,7 +2652,13 @@ def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction {
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_AMDGPU_MED3 : AMDGPUGenericInstruction {
|
||||
def G_AMDGPU_SMED3 : AMDGPUGenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
|
||||
let hasSideEffects = 0;
|
||||
|
328
test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
Normal file
328
test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
Normal file
@ -0,0 +1,328 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: test_min_max_ValK0_K1_i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_min_max_ValK0_K1_i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMAX %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMIN %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: min_max_ValK0_K1_i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: min_max_ValK0_K1_i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMAX %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMIN %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_min_K1max_ValK0__i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_min_K1max_ValK0__i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMAX %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMIN %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_min_K1max_K0Val__i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_min_K1max_K0Val__i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMAX %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMIN %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_min_ValK1_K0_i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_min_ValK1_K0_i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMIN %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMAX %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_min_K1Val_K0_i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_min_K1Val_K0_i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMIN %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMAX %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_K0min_ValK1__i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_K0min_ValK1__i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMIN %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMAX %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_K0min_K1Val__i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_K0min_K1Val__i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_SMIN %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_SMAX %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_K0min_K1Val__v2i16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
|
||||
; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
|
||||
; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%9:sgpr(s32) = G_CONSTANT i32 17
|
||||
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
|
||||
%10:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
|
||||
%11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
|
||||
%4:vgpr(<2 x s16>) = G_SMIN %11, %0
|
||||
%12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
|
||||
%7:vgpr(<2 x s16>) = G_SMAX %12, %4
|
||||
$vgpr0 = COPY %7(<2 x s16>)
|
||||
%8:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %8, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_uniform_min_max
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $sgpr2
|
||||
|
||||
; CHECK-LABEL: name: test_uniform_min_max
|
||||
; CHECK: liveins: $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
|
||||
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%0:sgpr(s32) = COPY $sgpr2
|
||||
%3:sgpr(s32) = G_CONSTANT i32 -12
|
||||
%4:sgpr(s32) = G_SMAX %0, %3
|
||||
%5:sgpr(s32) = G_CONSTANT i32 17
|
||||
%6:sgpr(s32) = G_SMIN %4, %5
|
||||
%8:vgpr(s32) = COPY %6(s32)
|
||||
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
|
||||
$sgpr0 = COPY %7(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
...
|
329
test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
Normal file
329
test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
Normal file
@ -0,0 +1,329 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: test_min_max_ValK0_K1_u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMAX %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMIN %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: min_max_ValK0_K1_i32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: min_max_ValK0_K1_i32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMAX %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMIN %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_min_K1max_ValK0__u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_min_K1max_ValK0__u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMAX %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMIN %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_min_K1max_K0Val__u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_min_K1max_K0Val__u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 12
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMAX %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 17
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMIN %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_min_ValK1_K0_u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMIN %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMAX %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_min_K1Val_K0_u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMIN %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMAX %3, %8
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_K0min_ValK1__u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_K0min_ValK1__u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMIN %0, %7
|
||||
%4:sgpr(s32) = G_CONSTANT i32 12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMAX %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_K0min_K1Val__u32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_K0min_K1Val__u32
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
|
||||
; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%2:sgpr(s32) = G_CONSTANT i32 17
|
||||
%7:vgpr(s32) = COPY %2(s32)
|
||||
%3:vgpr(s32) = G_UMIN %7, %0
|
||||
%4:sgpr(s32) = G_CONSTANT i32 12
|
||||
%8:vgpr(s32) = COPY %4(s32)
|
||||
%5:vgpr(s32) = G_UMAX %8, %3
|
||||
$vgpr0 = COPY %5(s32)
|
||||
%6:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %6, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_max_K0min_K1Val__v2u16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $vgpr0, $sgpr30_sgpr31
|
||||
|
||||
; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
|
||||
; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
|
||||
; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
%9:sgpr(s32) = G_CONSTANT i32 17
|
||||
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
|
||||
%10:sgpr(s32) = G_CONSTANT i32 12
|
||||
%5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
|
||||
%11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
|
||||
%4:vgpr(<2 x s16>) = G_UMIN %11, %0
|
||||
%12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
|
||||
%7:vgpr(<2 x s16>) = G_UMAX %12, %4
|
||||
$vgpr0 = COPY %7(<2 x s16>)
|
||||
%8:ccr_sgpr_64 = COPY %1
|
||||
S_SETPC_B64_return %8, implicit $vgpr0
|
||||
...
|
||||
|
||||
---
|
||||
name: test_uniform_min_max
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $sgpr2
|
||||
|
||||
; CHECK-LABEL: name: test_uniform_min_max
|
||||
; CHECK: liveins: $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
|
||||
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%0:sgpr(s32) = COPY $sgpr2
|
||||
%3:sgpr(s32) = G_CONSTANT i32 12
|
||||
%4:sgpr(s32) = G_UMAX %0, %3
|
||||
%5:sgpr(s32) = G_CONSTANT i32 17
|
||||
%6:sgpr(s32) = G_UMIN %4, %5
|
||||
%8:vgpr(s32) = COPY %6(s32)
|
||||
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
|
||||
$sgpr0 = COPY %7(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
|
||||
...
|
127
test/CodeGen/AMDGPU/GlobalISel/smed3.ll
Normal file
127
test/CodeGen/AMDGPU/GlobalISel/smed3.ll
Normal file
@ -0,0 +1,127 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
|
||||
; GFX10-LABEL: test_min_max_ValK0_K1_i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
|
||||
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @min_max_ValK0_K1_i32(i32 %a) {
|
||||
; GFX10-LABEL: min_max_ValK0_K1_i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
|
||||
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @test_min_K1max_ValK0__i32(i32 %a) {
|
||||
; GFX10-LABEL: test_min_K1max_ValK0__i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
|
||||
%smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @test_min_K1max_K0Val__i32(i32 %a) {
|
||||
; GFX10-LABEL: test_min_K1max_K0Val__i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
|
||||
%smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_min_ValK1_K0_i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
|
||||
%smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_min_K1Val_K0_i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
|
||||
%smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @test_max_K0min_ValK1__i32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_K0min_ValK1__i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
|
||||
%smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define i32 @test_max_K0min_K1Val__i32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_K0min_K1Val__i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
|
||||
%smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
|
||||
; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
|
||||
; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%smin = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
|
||||
%smed = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 -12, i16 -12>, <2 x i16> %smin)
|
||||
ret <2 x i16> %smed
|
||||
}
|
||||
|
||||
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
|
||||
; GFX10-LABEL: test_uniform_min_max:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_max_i32 s0, s2, -12
|
||||
; GFX10-NEXT: s_min_i32 s0, s0, 17
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
|
||||
%smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
|
||||
ret i32 %smed
|
||||
}
|
||||
|
||||
declare i32 @llvm.smin.i32(i32, i32)
|
||||
declare i32 @llvm.smax.i32(i32, i32)
|
||||
declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
|
||||
declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
|
127
test/CodeGen/AMDGPU/GlobalISel/umed3.ll
Normal file
127
test/CodeGen/AMDGPU/GlobalISel/umed3.ll
Normal file
@ -0,0 +1,127 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
|
||||
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
||||
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @min_max_ValK0_K1_i32(i32 %a) {
|
||||
; GFX10-LABEL: min_max_ValK0_K1_i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
|
||||
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @test_min_K1max_ValK0__u32(i32 %a) {
|
||||
; GFX10-LABEL: test_min_K1max_ValK0__u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
||||
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @test_min_K1max_K0Val__u32(i32 %a) {
|
||||
; GFX10-LABEL: test_min_K1max_K0Val__u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
|
||||
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
|
||||
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
|
||||
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @test_max_K0min_ValK1__u32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_K0min_ValK1__u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
|
||||
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define i32 @test_max_K0min_K1Val__u32(i32 %a) {
|
||||
; GFX10-LABEL: test_max_K0min_K1Val__u32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
|
||||
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
|
||||
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
|
||||
; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
|
||||
%umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
|
||||
ret <2 x i16> %umed
|
||||
}
|
||||
|
||||
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
|
||||
; GFX10-LABEL: test_uniform_min_max:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_max_u32 s0, s2, 12
|
||||
; GFX10-NEXT: s_min_u32 s0, s0, 17
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
||||
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
||||
ret i32 %umed
|
||||
}
|
||||
|
||||
declare i32 @llvm.umin.i32(i32, i32)
|
||||
declare i32 @llvm.umax.i32(i32, i32)
|
||||
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
|
||||
declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
|
Loading…
Reference in New Issue
Block a user