mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AMDGPU][GlobalISel] Use scalar min/max instructions
SALU min/max s32 instructions exist so use them. This means that regbankselect can handle min/max much like add/sub/mul/shifts. Differential Revision: https://reviews.llvm.org/D96047
This commit is contained in:
parent
425d60e18c
commit
414015c7e8
@ -591,21 +591,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
||||
|
||||
return AltMappings;
|
||||
}
|
||||
case TargetOpcode::G_SMIN:
|
||||
case TargetOpcode::G_SMAX:
|
||||
case TargetOpcode::G_UMIN:
|
||||
case TargetOpcode::G_UMAX: {
|
||||
static const OpRegBankEntry<3> Table[2] = {
|
||||
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
|
||||
|
||||
// Scalar requires cmp+select, and extends if 16-bit.
|
||||
// FIXME: Should there be separate costs for 32 and 16-bit
|
||||
{ { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
|
||||
};
|
||||
|
||||
const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
|
||||
return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
|
||||
}
|
||||
case TargetOpcode::G_UADDE:
|
||||
case TargetOpcode::G_USUBE:
|
||||
case TargetOpcode::G_SADDE:
|
||||
@ -1576,23 +1561,8 @@ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic(
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIXME: Duplicated from LegalizerHelper
|
||||
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case TargetOpcode::G_SMIN:
|
||||
return CmpInst::ICMP_SLT;
|
||||
case TargetOpcode::G_SMAX:
|
||||
return CmpInst::ICMP_SGT;
|
||||
case TargetOpcode::G_UMIN:
|
||||
return CmpInst::ICMP_ULT;
|
||||
case TargetOpcode::G_UMAX:
|
||||
return CmpInst::ICMP_UGT;
|
||||
default:
|
||||
llvm_unreachable("not in integer min/max");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned minMaxToExtend(unsigned Opc) {
|
||||
// Return a suitable opcode for extending the operands of Opc when widening.
|
||||
static unsigned getExtendOp(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case TargetOpcode::G_SMIN:
|
||||
case TargetOpcode::G_SMAX:
|
||||
@ -1601,7 +1571,7 @@ static unsigned minMaxToExtend(unsigned Opc) {
|
||||
case TargetOpcode::G_UMAX:
|
||||
return TargetOpcode::G_ZEXT;
|
||||
default:
|
||||
llvm_unreachable("not in integer min/max");
|
||||
return TargetOpcode::G_ANYEXT;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1628,30 +1598,6 @@ unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
|
||||
return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0));
|
||||
}
|
||||
|
||||
static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B,
|
||||
CmpInst::Predicate Pred,
|
||||
Register Dst, Register Src0,
|
||||
Register Src1) {
|
||||
const LLT CmpType = LLT::scalar(32);
|
||||
auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
|
||||
return B.buildSelect(Dst, Cmp, Src0, Src1);
|
||||
}
|
||||
|
||||
// FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
|
||||
void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
|
||||
MachineInstr &MI) const {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register Src0 = MI.getOperand(1).getReg();
|
||||
Register Src1 = MI.getOperand(2).getReg();
|
||||
|
||||
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
|
||||
MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1);
|
||||
|
||||
Register CmpReg = Sel->getOperand(1).getReg();
|
||||
B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
// For cases where only a single copy is inserted for matching register banks.
|
||||
// Replace the register in the instruction operand
|
||||
static bool substituteSimpleCopyRegs(
|
||||
@ -2341,7 +2287,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
case AMDGPU::G_MUL:
|
||||
case AMDGPU::G_SHL:
|
||||
case AMDGPU::G_LSHR:
|
||||
case AMDGPU::G_ASHR: {
|
||||
case AMDGPU::G_ASHR:
|
||||
case AMDGPU::G_SMIN:
|
||||
case AMDGPU::G_SMAX:
|
||||
case AMDGPU::G_UMIN:
|
||||
case AMDGPU::G_UMAX: {
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
|
||||
@ -2365,10 +2315,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
Register WideSrc0Lo, WideSrc0Hi;
|
||||
Register WideSrc1Lo, WideSrc1Hi;
|
||||
|
||||
unsigned ExtendOp = getExtendOp(MI.getOpcode());
|
||||
std::tie(WideSrc0Lo, WideSrc0Hi)
|
||||
= unpackV2S16ToS32(B, MI.getOperand(1).getReg(), AMDGPU::G_ANYEXT);
|
||||
= unpackV2S16ToS32(B, MI.getOperand(1).getReg(), ExtendOp);
|
||||
std::tie(WideSrc1Lo, WideSrc1Hi)
|
||||
= unpackV2S16ToS32(B, MI.getOperand(2).getReg(), AMDGPU::G_ANYEXT);
|
||||
= unpackV2S16ToS32(B, MI.getOperand(2).getReg(), ExtendOp);
|
||||
auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
|
||||
auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
|
||||
B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
|
||||
@ -2390,73 +2341,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_SMIN:
|
||||
case AMDGPU::G_SMAX:
|
||||
case AMDGPU::G_UMIN:
|
||||
case AMDGPU::G_UMAX: {
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
const RegisterBank *DstBank =
|
||||
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
|
||||
if (DstBank == &AMDGPU::VGPRRegBank)
|
||||
break;
|
||||
|
||||
MachineFunction *MF = MI.getParent()->getParent();
|
||||
MachineIRBuilder B(MI);
|
||||
|
||||
// Turn scalar min/max into a compare and select.
|
||||
LLT Ty = MRI.getType(DstReg);
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
const LLT S16 = LLT::scalar(16);
|
||||
const LLT V2S16 = LLT::vector(2, 16);
|
||||
|
||||
if (Ty == V2S16) {
|
||||
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
|
||||
B.setChangeObserver(ApplySALU);
|
||||
|
||||
// Need to widen to s32, and expand as cmp + select, and avoid producing
|
||||
// illegal vector extends or unmerges that would need further
|
||||
// legalization.
|
||||
//
|
||||
// TODO: Should we just readfirstlane? That should probably be handled
|
||||
// with a UniformVGPR register bank that wouldn't need special
|
||||
// consideration here.
|
||||
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register Src0 = MI.getOperand(1).getReg();
|
||||
Register Src1 = MI.getOperand(2).getReg();
|
||||
|
||||
Register WideSrc0Lo, WideSrc0Hi;
|
||||
Register WideSrc1Lo, WideSrc1Hi;
|
||||
|
||||
unsigned ExtendOp = minMaxToExtend(MI.getOpcode());
|
||||
|
||||
std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp);
|
||||
std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp);
|
||||
|
||||
Register Lo = MRI.createGenericVirtualRegister(S32);
|
||||
Register Hi = MRI.createGenericVirtualRegister(S32);
|
||||
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
|
||||
buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
|
||||
buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
|
||||
|
||||
B.buildBuildVectorTrunc(Dst, {Lo, Hi});
|
||||
MI.eraseFromParent();
|
||||
} else if (Ty == S16) {
|
||||
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
|
||||
B.setChangeObserver(ApplySALU);
|
||||
LegalizerHelper Helper(*MF, ApplySALU, B);
|
||||
|
||||
// Need to widen to s32, and expand as cmp + select.
|
||||
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
|
||||
llvm_unreachable("widenScalar should have succeeded");
|
||||
|
||||
// FIXME: This is relying on widenScalar leaving MI in place.
|
||||
lowerScalarMinMax(B, MI);
|
||||
} else
|
||||
lowerScalarMinMax(B, MI);
|
||||
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_SEXT_INREG: {
|
||||
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
|
||||
if (SrcRegs.empty())
|
||||
|
@ -84,8 +84,6 @@ public:
|
||||
bool applyMappingBFEIntrinsic(const OperandsMapper &OpdMapper,
|
||||
bool Signed) const;
|
||||
|
||||
void lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const;
|
||||
|
||||
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
|
||||
Register Reg) const;
|
||||
|
||||
|
@ -13,8 +13,7 @@ body: |
|
||||
; CHECK-LABEL: name: smax_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_SMAX %0, %1
|
||||
@ -90,9 +89,8 @@ body: |
|
||||
; CHECK-LABEL: name: smax_s32_ss_vgpr_use
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[SMAX]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_SMAX %0, %1
|
||||
@ -114,9 +112,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
|
||||
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -144,9 +141,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
|
||||
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -178,11 +174,9 @@ body: |
|
||||
; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
|
||||
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]]
|
||||
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
|
||||
; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]]
|
||||
; CHECK: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr0
|
||||
%1:_(<2 x s16>) = COPY $sgpr1
|
||||
|
@ -1,6 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
|
||||
# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: smin_s32_ss
|
||||
@ -13,9 +13,8 @@ body: |
|
||||
; CHECK-LABEL: name: smin_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $sgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
|
||||
; CHECK: $sgpr0 = COPY [[SMIN]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_SMIN %0, %1
|
||||
@ -93,9 +92,8 @@ body: |
|
||||
; CHECK-LABEL: name: smin_s32_ss_vgpr_use
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[SMIN]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_SMIN %0, %1
|
||||
@ -117,9 +115,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
|
||||
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -147,9 +144,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
|
||||
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -181,11 +177,9 @@ body: |
|
||||
; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
|
||||
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]]
|
||||
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
|
||||
; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]]
|
||||
; CHECK: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr0
|
||||
%1:_(<2 x s16>) = COPY $sgpr1
|
||||
|
@ -13,9 +13,8 @@ body: |
|
||||
; CHECK-LABEL: name: umax_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $sgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
|
||||
; CHECK: $sgpr0 = COPY [[UMAX]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_UMAX %0, %1
|
||||
@ -93,9 +92,8 @@ body: |
|
||||
; CHECK-LABEL: name: umax_s32_ss_vgpr_use
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[UMAX]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_UMAX %0, %1
|
||||
@ -117,9 +115,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -147,9 +144,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -183,11 +179,9 @@ body: |
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
|
||||
; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
|
||||
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]]
|
||||
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
|
||||
; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]]
|
||||
; CHECK: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr0
|
||||
%1:_(<2 x s16>) = COPY $sgpr1
|
||||
|
@ -13,9 +13,8 @@ body: |
|
||||
; CHECK-LABEL: name: umin_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $sgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]]
|
||||
; CHECK: $sgpr0 = COPY [[UMIN]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_UMIN %0, %1
|
||||
@ -97,9 +96,8 @@ body: |
|
||||
; CHECK-LABEL: name: umin_s32_ss_vgpr_use
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
|
||||
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]]
|
||||
; CHECK: $vgpr0 = COPY [[UMIN]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_UMIN %0, %1
|
||||
@ -121,9 +119,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[ZEXT]](s32), [[ZEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -151,9 +148,8 @@ body: |
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[ZEXT]](s32), [[ZEXT1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -187,11 +183,9 @@ body: |
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
|
||||
; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
|
||||
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]]
|
||||
; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
|
||||
; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[LSHR]](s32), [[LSHR1]]
|
||||
; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
|
||||
; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[AND]], [[AND1]]
|
||||
; CHECK: [[UMIN1:%[0-9]+]]:sgpr(s32) = G_UMIN [[LSHR]], [[LSHR1]]
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](s32), [[UMIN1]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr0
|
||||
%1:_(<2 x s16>) = COPY $sgpr1
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -53,8 +53,7 @@ define amdgpu_ps i7 @s_uaddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
|
||||
; GFX6-NEXT: s_not_b32 s2, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s2, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 25
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -143,8 +142,7 @@ define amdgpu_ps i8 @s_uaddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_not_b32 s2, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s2, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -272,17 +270,15 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s1, 8
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_not_b32 s4, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s4, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s4, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_not_b32 s3, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s3, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s3, s2
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s2
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -521,31 +517,27 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_not_b32 s8, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s8, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s8, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s8, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_not_b32 s5, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s5, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s5, s2
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s2
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
|
||||
; GFX6-NEXT: s_not_b32 s5, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s3
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s5, s3
|
||||
; GFX6-NEXT: s_min_u32 s3, s5, s3
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s3
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s4, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
|
||||
; GFX6-NEXT: s_not_b32 s5, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s5, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_min_u32 s4, s5, s4
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 24
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
|
||||
@ -736,8 +728,7 @@ define amdgpu_ps i24 @s_uaddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
|
||||
; GFX6-NEXT: s_not_b32 s2, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s2, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 8
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -809,8 +800,7 @@ define amdgpu_ps i32 @s_uaddsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
|
||||
; GFX6-LABEL: s_uaddsat_i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_not_b32 s2, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s2, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -932,12 +922,10 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
|
||||
; GFX6-LABEL: s_uaddsat_v2i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_not_b32 s4, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s4, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s4, s2
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s2
|
||||
; GFX6-NEXT: s_not_b32 s2, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s3
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s2, s3
|
||||
; GFX6-NEXT: s_min_u32 s2, s2, s3
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s2
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1019,16 +1007,13 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
|
||||
; GFX6-LABEL: s_uaddsat_v3i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_not_b32 s6, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s6, s3
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s6, s3
|
||||
; GFX6-NEXT: s_min_u32 s3, s6, s3
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s3
|
||||
; GFX6-NEXT: s_not_b32 s3, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s3, s4
|
||||
; GFX6-NEXT: s_min_u32 s3, s3, s4
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s3
|
||||
; GFX6-NEXT: s_not_b32 s3, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s5
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s3, s5
|
||||
; GFX6-NEXT: s_min_u32 s3, s3, s5
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1124,20 +1109,16 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
|
||||
; GFX6-LABEL: s_uaddsat_v4i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_not_b32 s8, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s8, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s8, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s8, s4
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s4
|
||||
; GFX6-NEXT: s_not_b32 s4, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s5
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s4, s5
|
||||
; GFX6-NEXT: s_min_u32 s4, s4, s5
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s4
|
||||
; GFX6-NEXT: s_not_b32 s4, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s4, s6
|
||||
; GFX6-NEXT: s_min_u32 s4, s4, s6
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s4
|
||||
; GFX6-NEXT: s_not_b32 s4, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s7
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s4, s7
|
||||
; GFX6-NEXT: s_min_u32 s4, s4, s7
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s4
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1247,24 +1228,19 @@ define amdgpu_ps <5 x i32> @s_uaddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
|
||||
; GFX6-LABEL: s_uaddsat_v5i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_not_b32 s10, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s10, s5
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s10, s5
|
||||
; GFX6-NEXT: s_min_u32 s5, s10, s5
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s5
|
||||
; GFX6-NEXT: s_not_b32 s5, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s5, s6
|
||||
; GFX6-NEXT: s_min_u32 s5, s5, s6
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s5
|
||||
; GFX6-NEXT: s_not_b32 s5, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s7
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s5, s7
|
||||
; GFX6-NEXT: s_min_u32 s5, s5, s7
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s5
|
||||
; GFX6-NEXT: s_not_b32 s5, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s5, s8
|
||||
; GFX6-NEXT: s_min_u32 s5, s5, s8
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s5
|
||||
; GFX6-NEXT: s_not_b32 s5, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s9
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s5, s9
|
||||
; GFX6-NEXT: s_min_u32 s5, s5, s9
|
||||
; GFX6-NEXT: s_add_i32 s4, s4, s5
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1448,68 +1424,52 @@ define amdgpu_ps <16 x i32> @s_uaddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
|
||||
; GFX6-LABEL: s_uaddsat_v16i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_not_b32 s32, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s32, s16
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s32, s16
|
||||
; GFX6-NEXT: s_min_u32 s16, s32, s16
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s17
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s17
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s17
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s18
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s18
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s18
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s19
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s19
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s19
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s20
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s20
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s20
|
||||
; GFX6-NEXT: s_add_i32 s4, s4, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s21
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s21
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s21
|
||||
; GFX6-NEXT: s_add_i32 s5, s5, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s6
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s22
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s22
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s22
|
||||
; GFX6-NEXT: s_add_i32 s6, s6, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s7
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s23
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s23
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s23
|
||||
; GFX6-NEXT: s_add_i32 s7, s7, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s8
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s24
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s24
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s24
|
||||
; GFX6-NEXT: s_add_i32 s8, s8, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s9
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s25
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s25
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s25
|
||||
; GFX6-NEXT: s_add_i32 s9, s9, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s10
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s26
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s26
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s26
|
||||
; GFX6-NEXT: s_add_i32 s10, s10, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s11
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s27
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s27
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s27
|
||||
; GFX6-NEXT: s_add_i32 s11, s11, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s12
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s28
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s28
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s28
|
||||
; GFX6-NEXT: s_add_i32 s12, s12, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s13
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s29
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s29
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s29
|
||||
; GFX6-NEXT: s_add_i32 s13, s13, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s14
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s30
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s30
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s30
|
||||
; GFX6-NEXT: s_add_i32 s14, s14, s16
|
||||
; GFX6-NEXT: s_not_b32 s16, s15
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s31
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s16, s31
|
||||
; GFX6-NEXT: s_min_u32 s16, s16, s31
|
||||
; GFX6-NEXT: s_add_i32 s15, s15, s16
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1696,8 +1656,7 @@ define amdgpu_ps i16 @s_uaddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_not_b32 s2, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s2, s1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -1835,17 +1794,15 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_not_b32 s4, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s4, s2
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s4, s2
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s2
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_not_b32 s3, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s3, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s3, s2
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s2
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -2053,33 +2010,29 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_not_b32 s8, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s8, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s8, s4
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s8, s4
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s4
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_not_b32 s5, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s5, s4
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s5, s4
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s4
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
|
||||
; GFX6-NEXT: s_not_b32 s5, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s5, s4
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s5, s4
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s4
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
|
||||
; GFX6-NEXT: s_not_b32 s5, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s5, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s5, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s1, s2, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -2234,49 +2187,43 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_not_b32 s12, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s12, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s12, s6
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s12, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_not_b32 s7, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s7, s6
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s7, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
|
||||
; GFX6-NEXT: s_not_b32 s7, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s7, s6
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s7, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
|
||||
; GFX6-NEXT: s_not_b32 s7, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s7, s6
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s7, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
|
||||
; GFX6-NEXT: s_not_b32 s7, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s7, s6
|
||||
; GFX6-NEXT: s_add_i32 s4, s4, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s7, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_add_i32 s4, s4, s6
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
|
||||
; GFX6-NEXT: s_not_b32 s7, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s7, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_min_u32 s6, s7, s6
|
||||
; GFX6-NEXT: s_add_i32 s5, s5, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_or_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
|
||||
; GFX6-NEXT: s_or_b32 s2, s4, s2
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -2454,65 +2401,57 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
|
||||
; GFX6-NEXT: s_not_b32 s16, s0
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s16, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s16, s8
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s16, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s1
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_add_i32 s1, s1, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_add_i32 s2, s2, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s4, s4, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_add_i32 s4, s4, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s5, s5, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_add_i32 s5, s5, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s6
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s6, s6, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_add_i32 s6, s6, s8
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
|
||||
; GFX6-NEXT: s_not_b32 s9, s7
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s9, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_min_u32 s8, s9, s8
|
||||
; GFX6-NEXT: s_add_i32 s7, s7, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_or_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
|
||||
; GFX6-NEXT: s_or_b32 s2, s4, s2
|
||||
; GFX6-NEXT: s_or_b32 s3, s6, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
|
@ -51,8 +51,7 @@ define amdgpu_ps i7 @s_usubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 25
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -139,8 +138,7 @@ define amdgpu_ps i8 @s_usubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -265,16 +263,14 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s1, 8
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s1, s2
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s2
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -508,28 +504,24 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
|
||||
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s1, s2
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s2
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s3
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s2, s3
|
||||
; GFX6-NEXT: s_min_u32 s3, s2, s3
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s3
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s4, 24
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s3, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
|
||||
; GFX6-NEXT: s_min_u32 s4, s3, s4
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 24
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
|
||||
@ -718,8 +710,7 @@ define amdgpu_ps i24 @s_usubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 8
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -789,8 +780,7 @@ define i32 @v_usubsat_i32(i32 %lhs, i32 %rhs) {
|
||||
define amdgpu_ps i32 @s_usubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
|
||||
; GFX6-LABEL: s_usubsat_i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -907,11 +897,9 @@ define <2 x i32> @v_usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
define amdgpu_ps <2 x i32> @s_usubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) {
|
||||
; GFX6-LABEL: s_usubsat_v2i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s0, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s0, s2
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s2
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s3
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s1, s3
|
||||
; GFX6-NEXT: s_min_u32 s2, s1, s3
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s2
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -989,14 +977,11 @@ define <3 x i32> @v_usubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
|
||||
define amdgpu_ps <3 x i32> @s_usubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) {
|
||||
; GFX6-LABEL: s_usubsat_v3i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s3
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s0, s3
|
||||
; GFX6-NEXT: s_min_u32 s3, s0, s3
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s1, s4
|
||||
; GFX6-NEXT: s_min_u32 s3, s1, s4
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s3
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s5
|
||||
; GFX6-NEXT: s_cselect_b32 s3, s2, s5
|
||||
; GFX6-NEXT: s_min_u32 s3, s2, s5
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1087,17 +1072,13 @@ define <4 x i32> @v_usubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
define amdgpu_ps <4 x i32> @s_usubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) {
|
||||
; GFX6-LABEL: s_usubsat_v4i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s0, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s0, s4
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s5
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s1, s5
|
||||
; GFX6-NEXT: s_min_u32 s4, s1, s5
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s2, s6
|
||||
; GFX6-NEXT: s_min_u32 s4, s2, s6
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s4
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s7
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s3, s7
|
||||
; GFX6-NEXT: s_min_u32 s4, s3, s7
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s4
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1201,20 +1182,15 @@ define <5 x i32> @v_usubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
|
||||
define amdgpu_ps <5 x i32> @s_usubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) {
|
||||
; GFX6-LABEL: s_usubsat_v5i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s5
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s0, s5
|
||||
; GFX6-NEXT: s_min_u32 s5, s0, s5
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s1, s6
|
||||
; GFX6-NEXT: s_min_u32 s5, s1, s6
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s7
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s2, s7
|
||||
; GFX6-NEXT: s_min_u32 s5, s2, s7
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s3, s8
|
||||
; GFX6-NEXT: s_min_u32 s5, s3, s8
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s5
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s9
|
||||
; GFX6-NEXT: s_cselect_b32 s5, s4, s9
|
||||
; GFX6-NEXT: s_min_u32 s5, s4, s9
|
||||
; GFX6-NEXT: s_sub_i32 s4, s4, s5
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1381,53 +1357,37 @@ define <16 x i32> @v_usubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
|
||||
define amdgpu_ps <16 x i32> @s_usubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) {
|
||||
; GFX6-LABEL: s_usubsat_v16i32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s16
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s0, s16
|
||||
; GFX6-NEXT: s_min_u32 s16, s0, s16
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s17
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s1, s17
|
||||
; GFX6-NEXT: s_min_u32 s16, s1, s17
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s18
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s2, s18
|
||||
; GFX6-NEXT: s_min_u32 s16, s2, s18
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s19
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s3, s19
|
||||
; GFX6-NEXT: s_min_u32 s16, s3, s19
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s20
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s4, s20
|
||||
; GFX6-NEXT: s_min_u32 s16, s4, s20
|
||||
; GFX6-NEXT: s_sub_i32 s4, s4, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s21
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s5, s21
|
||||
; GFX6-NEXT: s_min_u32 s16, s5, s21
|
||||
; GFX6-NEXT: s_sub_i32 s5, s5, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s6, s22
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s6, s22
|
||||
; GFX6-NEXT: s_min_u32 s16, s6, s22
|
||||
; GFX6-NEXT: s_sub_i32 s6, s6, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s23
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s7, s23
|
||||
; GFX6-NEXT: s_min_u32 s16, s7, s23
|
||||
; GFX6-NEXT: s_sub_i32 s7, s7, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s8, s24
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s8, s24
|
||||
; GFX6-NEXT: s_min_u32 s16, s8, s24
|
||||
; GFX6-NEXT: s_sub_i32 s8, s8, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s9, s25
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s9, s25
|
||||
; GFX6-NEXT: s_min_u32 s16, s9, s25
|
||||
; GFX6-NEXT: s_sub_i32 s9, s9, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s10, s26
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s10, s26
|
||||
; GFX6-NEXT: s_min_u32 s16, s10, s26
|
||||
; GFX6-NEXT: s_sub_i32 s10, s10, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s11, s27
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s11, s27
|
||||
; GFX6-NEXT: s_min_u32 s16, s11, s27
|
||||
; GFX6-NEXT: s_sub_i32 s11, s11, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s12, s28
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s12, s28
|
||||
; GFX6-NEXT: s_min_u32 s16, s12, s28
|
||||
; GFX6-NEXT: s_sub_i32 s12, s12, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s13, s29
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s13, s29
|
||||
; GFX6-NEXT: s_min_u32 s16, s13, s29
|
||||
; GFX6-NEXT: s_sub_i32 s13, s13, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s14, s30
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s14, s30
|
||||
; GFX6-NEXT: s_min_u32 s16, s14, s30
|
||||
; GFX6-NEXT: s_sub_i32 s14, s14, s16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s15, s31
|
||||
; GFX6-NEXT: s_cselect_b32 s16, s15, s31
|
||||
; GFX6-NEXT: s_min_u32 s16, s15, s31
|
||||
; GFX6-NEXT: s_sub_i32 s15, s15, s16
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -1612,8 +1572,7 @@ define amdgpu_ps i16 @s_usubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s1
|
||||
; GFX6-NEXT: s_cselect_b32 s1, s0, s1
|
||||
; GFX6-NEXT: s_min_u32 s1, s0, s1
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -1746,16 +1705,14 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s0, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s0, s2
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s2
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
|
||||
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
|
||||
; GFX6-NEXT: s_min_u32 s2, s1, s2
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s2
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -1954,30 +1911,26 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s0, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s0, s4
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s1, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s1, s4
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s2, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s2, s4
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
|
||||
; GFX6-NEXT: s_cselect_b32 s4, s3, s4
|
||||
; GFX6-NEXT: s_min_u32 s4, s3, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s4
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s1, s2, s1
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -2125,44 +2078,38 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s0, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s0, s6
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s1, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s1, s6
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s2, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s2, s6
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s3, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s3, s6
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s4, s6
|
||||
; GFX6-NEXT: s_min_u32 s6, s4, s6
|
||||
; GFX6-NEXT: s_sub_i32 s4, s4, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s6
|
||||
; GFX6-NEXT: s_cselect_b32 s6, s5, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_min_u32 s6, s5, s6
|
||||
; GFX6-NEXT: s_sub_i32 s5, s5, s6
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_or_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
|
||||
; GFX6-NEXT: s_or_b32 s2, s4, s2
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
@ -2331,58 +2278,50 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s0, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s0, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s0, s8
|
||||
; GFX6-NEXT: s_sub_i32 s0, s0, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s1, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s1, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s1, s8
|
||||
; GFX6-NEXT: s_sub_i32 s1, s1, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s2, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s2, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s2, s8
|
||||
; GFX6-NEXT: s_sub_i32 s2, s2, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s3, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s3, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s3, s8
|
||||
; GFX6-NEXT: s_sub_i32 s3, s3, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s4, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s4, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s4, s8
|
||||
; GFX6-NEXT: s_sub_i32 s4, s4, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s5, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s5, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s5, s8
|
||||
; GFX6-NEXT: s_sub_i32 s5, s5, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s6, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s6, s8
|
||||
; GFX6-NEXT: s_min_u32 s8, s6, s8
|
||||
; GFX6-NEXT: s_sub_i32 s6, s6, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
|
||||
; GFX6-NEXT: s_cmp_lt_u32 s7, s8
|
||||
; GFX6-NEXT: s_cselect_b32 s8, s7, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_min_u32 s8, s7, s8
|
||||
; GFX6-NEXT: s_sub_i32 s7, s7, s8
|
||||
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX6-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s2, s2, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
|
||||
; GFX6-NEXT: s_or_b32 s1, s2, s1
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
|
||||
; GFX6-NEXT: s_lshr_b32 s6, s6, 16
|
||||
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
|
||||
; GFX6-NEXT: s_or_b32 s2, s4, s2
|
||||
; GFX6-NEXT: s_or_b32 s3, s6, s3
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
|
Loading…
x
Reference in New Issue
Block a user