mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[GlobalISel] Fold xor(cmp(pred, _, _), 1) -> cmp(inverse(pred), _, _)
This is needed for an upcoming change to how we translate conditional branches which might generate these. Differential Revision: https://reviews.llvm.org/D86383
This commit is contained in:
parent
6aff3ef558
commit
399486642d
@ -355,6 +355,10 @@ public:
|
||||
/// \return true if \p MI is a G_SEXT_INREG that can be erased.
|
||||
bool matchRedundantSExtInReg(MachineInstr &MI);
|
||||
|
||||
/// Combine inverting a result of a compare into the opposite cond code.
|
||||
bool matchNotCmp(MachineInstr &MI, Register &CmpReg);
|
||||
bool applyNotCmp(MachineInstr &MI, Register &CmpReg);
|
||||
|
||||
/// Try to transform \p MI by using all of the above
|
||||
/// combine functions. Returns true if changed.
|
||||
bool tryCombine(MachineInstr &MI);
|
||||
|
@ -233,6 +233,12 @@ m_GAnd(const LHS &L, const RHS &R) {
|
||||
return BinaryOp_match<LHS, RHS, TargetOpcode::G_AND, true>(L, R);
|
||||
}
|
||||
|
||||
template <typename LHS, typename RHS>
|
||||
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>
|
||||
m_GXor(const LHS &L, const RHS &R) {
|
||||
return BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>(L, R);
|
||||
}
|
||||
|
||||
template <typename LHS, typename RHS>
|
||||
inline BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true> m_GOr(const LHS &L,
|
||||
const RHS &R) {
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/Register.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/Support/Alignment.h"
|
||||
#include "llvm/Support/LowLevelTypeImpl.h"
|
||||
#include "llvm/Support/MachineValueType.h"
|
||||
@ -227,6 +228,10 @@ LLT getGCDType(LLT OrigTy, LLT TargetTy);
|
||||
/// If \p MI is not a splat, returns None.
|
||||
Optional<int> getSplatIndex(MachineInstr &MI);
|
||||
|
||||
/// Returns a scalar constant of a G_BUILD_VECTOR splat if it exists.
|
||||
Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Return true if the specified instruction is a G_BUILD_VECTOR or
|
||||
/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
|
||||
bool isBuildVectorAllZeros(const MachineInstr &MI,
|
||||
@ -237,5 +242,9 @@ bool isBuildVectorAllZeros(const MachineInstr &MI,
|
||||
bool isBuildVectorAllOnes(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Returns true if given the TargetLowering's boolean contents information,
|
||||
/// the value \p Val contains a true value.
|
||||
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
bool IsFP);
|
||||
} // End namespace llvm.
|
||||
#endif
|
||||
|
@ -373,6 +373,14 @@ def ext_ext_fold: GICombineRule <
|
||||
(apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
def not_cmp_fold_matchinfo : GIDefMatchData<"Register">;
|
||||
def not_cmp_fold : GICombineRule<
|
||||
(defs root:$d, not_cmp_fold_matchinfo:$info),
|
||||
(match (wip_match_opcode G_XOR): $d,
|
||||
[{ return Helper.matchNotCmp(*${d}, ${info}); }]),
|
||||
(apply [{ return Helper.applyNotCmp(*${d}, ${info}); }])
|
||||
>;
|
||||
|
||||
// FIXME: These should use the custom predicate feature once it lands.
|
||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
|
||||
undef_to_negative_one,
|
||||
@ -400,4 +408,5 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
|
||||
hoist_logic_op_with_same_opcode_hands,
|
||||
shl_ashr_to_sext_inreg, sext_inreg_of_load,
|
||||
width_reduction_combines, select_combines,
|
||||
known_bits_simplifications, ext_ext_fold]>;
|
||||
known_bits_simplifications, ext_ext_fold,
|
||||
not_cmp_fold]>;
|
||||
|
@ -2231,6 +2231,74 @@ bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
|
||||
return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
|
||||
}
|
||||
|
||||
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
|
||||
int64_t Cst, bool IsVector, bool IsFP) {
|
||||
// For i1, Cst will always be -1 regardless of boolean contents.
|
||||
return (ScalarSizeBits == 1 && Cst == -1) ||
|
||||
isConstTrueVal(TLI, Cst, IsVector, IsFP);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_XOR);
|
||||
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
|
||||
const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
|
||||
Register XorSrc;
|
||||
Register CstReg;
|
||||
int64_t Cst;
|
||||
// We match xor(src, true) here.
|
||||
if (!mi_match(MI.getOperand(0).getReg(), MRI,
|
||||
m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
|
||||
return false;
|
||||
|
||||
if (!MRI.hasOneNonDBGUse(XorSrc))
|
||||
return false;
|
||||
|
||||
// Now try match src to either icmp or fcmp.
|
||||
bool IsFP = false;
|
||||
if (!mi_match(XorSrc, MRI, m_GICmp(m_Pred(), m_Reg(), m_Reg()))) {
|
||||
// Try fcmp.
|
||||
if (!mi_match(XorSrc, MRI, m_GFCmp(m_Pred(), m_Reg(), m_Reg())))
|
||||
return false;
|
||||
IsFP = true;
|
||||
}
|
||||
|
||||
if (Ty.isVector()) {
|
||||
MachineInstr *CstDef = MRI.getVRegDef(CstReg);
|
||||
auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
|
||||
if (!MaybeCst)
|
||||
return false;
|
||||
if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
|
||||
return false;
|
||||
} else {
|
||||
if (!mi_match(CstReg, MRI, m_ICst(Cst)))
|
||||
return false;
|
||||
if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
|
||||
return false;
|
||||
}
|
||||
|
||||
CmpReg = XorSrc;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::applyNotCmp(MachineInstr &MI, Register &CmpReg) {
|
||||
MachineInstr *CmpDef = MRI.getVRegDef(CmpReg);
|
||||
assert(CmpDef && "Should have been given an MI reg");
|
||||
assert(CmpDef->getOpcode() == TargetOpcode::G_ICMP ||
|
||||
CmpDef->getOpcode() == TargetOpcode::G_FCMP);
|
||||
|
||||
Observer.changingInstr(*CmpDef);
|
||||
MachineOperand &PredOp = CmpDef->getOperand(1);
|
||||
CmpInst::Predicate NewP = CmpInst::getInversePredicate(
|
||||
(CmpInst::Predicate)PredOp.getPredicate());
|
||||
PredOp.setPredicate(NewP);
|
||||
Observer.changedInstr(*CmpDef);
|
||||
|
||||
replaceRegWith(MRI, MI.getOperand(0).getReg(),
|
||||
CmpDef->getOperand(0).getReg());
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::tryCombine(MachineInstr &MI) {
|
||||
if (tryCombineCopy(MI))
|
||||
return true;
|
||||
|
@ -11,6 +11,8 @@
|
||||
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/ADT/APFloat.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
@ -694,6 +696,28 @@ static bool isBuildVectorConstantSplat(const MachineInstr &MI,
|
||||
return true;
|
||||
}
|
||||
|
||||
Optional<int64_t>
|
||||
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
if (!isBuildVectorOp(MI.getOpcode()))
|
||||
return None;
|
||||
|
||||
const unsigned NumOps = MI.getNumOperands();
|
||||
Optional<int64_t> Scalar;
|
||||
for (unsigned I = 1; I != NumOps; ++I) {
|
||||
Register Element = MI.getOperand(I).getReg();
|
||||
int64_t ElementValue;
|
||||
if (!mi_match(Element, MRI, m_ICst(ElementValue)))
|
||||
return None;
|
||||
if (!Scalar)
|
||||
Scalar = ElementValue;
|
||||
else if (*Scalar != ElementValue)
|
||||
return None;
|
||||
}
|
||||
|
||||
return Scalar;
|
||||
}
|
||||
|
||||
bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
return isBuildVectorConstantSplat(MI, MRI, 0);
|
||||
@ -703,3 +727,16 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
return isBuildVectorConstantSplat(MI, MRI, -1);
|
||||
}
|
||||
|
||||
bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
bool IsFP) {
|
||||
switch (TLI.getBooleanContents(IsVector, IsFP)) {
|
||||
case TargetLowering::UndefinedBooleanContent:
|
||||
return Val & 0x1;
|
||||
case TargetLowering::ZeroOrOneBooleanContent:
|
||||
return Val == 1;
|
||||
case TargetLowering::ZeroOrNegativeOneBooleanContent:
|
||||
return Val == -1;
|
||||
}
|
||||
llvm_unreachable("Invalid boolean contents");
|
||||
}
|
||||
|
@ -0,0 +1,163 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="not_cmp_fold" %s -o - -verify-machineinstrs | FileCheck %s
|
||||
|
||||
# Check that we fold an compare result inverted into just inverting the condition code.
|
||||
---
|
||||
name: icmp
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]]
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(s1) = G_CONSTANT i1 1
|
||||
%3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1
|
||||
%4:_(s1) = G_XOR %3, %2
|
||||
%5:_(s32) = G_ANYEXT %4
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: fcmp
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: fcmp
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ule), [[COPY]](s64), [[C]]
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(s1) = G_CONSTANT i1 1
|
||||
%3:_(s1) = G_FCMP floatpred(ogt), %0(s64), %1
|
||||
%4:_(s1) = G_XOR %3, %2
|
||||
%5:_(s32) = G_ANYEXT %4
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_not_xor_with_1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp_not_xor_with_1
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C1]]
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(s1) = G_CONSTANT i1 0
|
||||
%3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1
|
||||
%4:_(s1) = G_XOR %3, %2
|
||||
%5:_(s32) = G_ANYEXT %4
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_not_xor_with_wrong_bool_contents
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; Even though bit 0 of the constant is 1, we require zero in the upper bits
|
||||
; for our aarch64's zero-or-one boolean contents.
|
||||
; CHECK-LABEL: name: icmp_not_xor_with_wrong_bool_contents
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ICMP]], [[C1]]
|
||||
; CHECK: $w0 = COPY [[XOR]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(s32) = G_CONSTANT i32 7
|
||||
%3:_(s32) = G_ICMP intpred(sgt), %0(s64), %1
|
||||
%4:_(s32) = G_XOR %3, %2
|
||||
$w0 = COPY %4(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_multiple_use
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp_multiple_use
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C1]]
|
||||
; CHECK: %other_use:_(s1) = G_AND [[ICMP]], [[C1]]
|
||||
; CHECK: %other_use_ext:_(s32) = G_ANYEXT %other_use(s1)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $w1 = COPY %other_use_ext(s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(s1) = G_CONSTANT i1 1
|
||||
%3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1
|
||||
%4:_(s1) = G_XOR %3, %2
|
||||
%other_use:_(s1) = G_AND %3, %2
|
||||
%other_use_ext:_(s32) = G_ANYEXT %other_use(s1)
|
||||
%5:_(s32) = G_ANYEXT %4
|
||||
$w0 = COPY %5(s32)
|
||||
$w1 = COPY %other_use_ext
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_vector
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: icmp_vector
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK: %splat_op2:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s1>) = G_ICMP intpred(sle), [[COPY]](<4 x s32>), %splat_op2
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[ICMP]](<4 x s1>)
|
||||
; CHECK: $q0 = COPY [[ANYEXT]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%splat_op2:_(<4 x s32>) = G_BUILD_VECTOR %1, %1, %1, %1
|
||||
%2:_(s1) = G_CONSTANT i1 1
|
||||
%splat_true:_(<4 x s1>) = G_BUILD_VECTOR %2, %2, %2, %2
|
||||
%3:_(<4 x s1>) = G_ICMP intpred(sgt), %0(<4 x s32>), %splat_op2
|
||||
%4:_(<4 x s1>) = G_XOR %3, %splat_true
|
||||
%5:_(<4 x s32>) = G_ANYEXT %4
|
||||
$q0 = COPY %5(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
@ -136,27 +136,24 @@ define void @constrained_if_register_class() {
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; CHECK-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; CHECK-NEXT: s_xor_b32 s4, s4, -1
|
||||
; CHECK-NEXT: s_and_b32 s4, s4, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB4_6
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB4_6
|
||||
; CHECK-NEXT: ; %bb.1: ; %bb2
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s5
|
||||
; CHECK-NEXT: flat_load_dword v0, v[0:1]
|
||||
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||||
; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+4
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: s_mov_b32 s4, -1
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s7
|
||||
; CHECK-NEXT: flat_load_dword v0, v[0:1]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
|
||||
; CHECK-NEXT: s_xor_b64 s[8:9], vcc, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
|
||||
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, 1.0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; CHECK-NEXT: ; %bb.2: ; %bb7
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: ; %bb.3: ; %bb8
|
||||
@ -217,10 +214,8 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
|
||||
; CHECK-NEXT: ; %bb.2: ; %bb4
|
||||
; CHECK-NEXT: ; in Loop: Header=BB5_1 Depth=1
|
||||
; CHECK-NEXT: global_load_dword v2, v[0:1], off
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, 1
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, v0, v2
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], vcc, s[2:3]
|
||||
; CHECK-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, v2
|
||||
; CHECK-NEXT: BB5_3: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB5_1 Depth=1
|
||||
; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3]
|
||||
|
@ -10,12 +10,10 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_or_b32_e32 v5, v1, v3
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
@ -204,10 +202,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: s_mov_b32 s1, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[0:1]
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 vcc, s[6:7], s[8:9]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
|
||||
@ -358,11 +353,14 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, 0
|
||||
; CHECK-NEXT: BB1_2: ; %Flow
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s0, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
|
||||
; CHECK-NEXT: s_sub_i32 s0, 0, s4
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
|
||||
@ -382,7 +380,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: BB1_4:
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
@ -695,12 +693,10 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v8, v0
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v9, v5
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
||||
@ -874,12 +870,10 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v7
|
||||
@ -2513,12 +2507,10 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
||||
; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: v_or_b32_e32 v3, v1, v5
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v5
|
||||
@ -3002,13 +2994,11 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v5, v0
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v7, v11
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v11
|
||||
@ -3182,12 +3172,10 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v9
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v9
|
||||
|
@ -10,12 +10,10 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_or_b32_e32 v5, v1, v3
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
@ -200,10 +198,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: s_mov_b32 s1, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[0:1]
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 vcc, s[6:7], s[8:9]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_ashr_i32 s0, s5, 31
|
||||
@ -352,11 +347,14 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, 0
|
||||
; CHECK-NEXT: BB1_2: ; %Flow
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s0, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
|
||||
; CHECK-NEXT: s_sub_i32 s0, 0, s4
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
|
||||
@ -374,7 +372,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: BB1_4:
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
@ -683,12 +681,10 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v8, v0
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v9, v5
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
||||
@ -858,12 +854,10 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v7
|
||||
@ -2477,12 +2471,10 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
||||
; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: v_or_b32_e32 v3, v1, v5
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v5
|
||||
@ -2958,13 +2950,11 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v5, v0
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v7, v11
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v11
|
||||
@ -3134,12 +3124,10 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v9
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v9
|
||||
|
@ -10,12 +10,10 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_or_b32_e32 v5, v1, v3
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
|
||||
@ -189,10 +187,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: s_mov_b32 s5, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 vcc, s[6:7], s[8:9]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
@ -324,11 +319,14 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: s_mov_b32 s5, 0
|
||||
; CHECK-NEXT: BB1_2: ; %Flow
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s1, s5, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: s_sub_i32 s1, 0, s2
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
|
||||
@ -348,7 +346,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: BB1_4:
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
@ -631,12 +629,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v9, v1
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v9, v5
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
@ -795,12 +791,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6
|
||||
@ -2292,15 +2286,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_movk_i32 s4, 0x1000
|
||||
; CHECK-NEXT: s_mov_b32 s5, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v6, 0
|
||||
; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2
|
||||
; CHECK-NEXT: v_or_b32_e32 v7, v1, v5
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v3, v1, v5
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4
|
||||
@ -2736,16 +2728,14 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v7, v1
|
||||
; CGP-NEXT: s_movk_i32 s4, 0x1000
|
||||
; CGP-NEXT: s_mov_b32 s5, 0
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4
|
||||
; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v7, v11
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
|
||||
@ -2904,12 +2894,10 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v9
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8
|
||||
|
@ -10,12 +10,10 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_or_b32_e32 v5, v1, v3
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2
|
||||
@ -186,10 +184,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: s_mov_b32 s5, -1
|
||||
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 vcc, s[6:7], s[8:9]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
|
||||
; CHECK-NEXT: s_cbranch_vccz BB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
@ -320,11 +315,14 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; CHECK-NEXT: s_mov_b32 s5, 0
|
||||
; CHECK-NEXT: BB1_2: ; %Flow
|
||||
; CHECK-NEXT: s_branch BB1_3
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s1, s5, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: s_sub_i32 s1, 0, s2
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
|
||||
@ -342,7 +340,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; CHECK-NEXT: BB1_4:
|
||||
; CHECK-NEXT: BB1_5:
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
@ -623,12 +621,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v9, v1
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v9, v5
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
|
||||
@ -784,12 +780,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v7
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB2_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6
|
||||
@ -2258,15 +2252,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_movk_i32 s4, 0x1000
|
||||
; CHECK-NEXT: s_mov_b32 s5, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v6, 0
|
||||
; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2
|
||||
; CHECK-NEXT: v_or_b32_e32 v7, v1, v5
|
||||
; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CHECK-NEXT: v_or_b32_e32 v3, v1, v5
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_execz BB7_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4
|
||||
@ -2697,16 +2689,14 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: v_mov_b32_e32 v7, v1
|
||||
; CGP-NEXT: s_movk_i32 s4, 0x1000
|
||||
; CGP-NEXT: s_mov_b32 s5, 0
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4
|
||||
; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6
|
||||
; CGP-NEXT: v_or_b32_e32 v1, v7, v11
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_2
|
||||
; CGP-NEXT: ; %bb.1:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10
|
||||
@ -2862,12 +2852,10 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CGP-NEXT: v_or_b32_e32 v5, v3, v9
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0
|
||||
; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1
|
||||
; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5]
|
||||
; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
|
||||
; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
||||
; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
|
||||
; CGP-NEXT: s_cbranch_execz BB8_6
|
||||
; CGP-NEXT: ; %bb.5:
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8
|
||||
|
Loading…
Reference in New Issue
Block a user