mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[GlobalISel] Rewrite the elide-br-by-swapping-icmp-ops combine to do less.
This combine previously tried to take sequences like: %cond = G_ICMP pred, a, b G_BRCOND %cond, %truebb G_BR %falsebb %truebb: ... %falsebb: ... and by inverting the compare predicate and swapping branch targets, delete the G_BR and instead have a single conditional branch to the falsebb. Since in an earlier patch we have a combine to fold not(icmp) into just an inverted icmp, we don't need this combine to do as much. This patch instead generalizes the combine by just looking for: G_BRCOND %cond, %truebb G_BR %falsebb %truebb: ... %falsebb: ... and then inverting the condition using a not (xor). The xor can be folded away in a separate combine. This change also lets us avoid some optimization code in the IRTranslator. I also think that deleting G_BRs in the combiner is unnecessary. That's something that targets can decide to do at selection time and could simplify generic code in future. Differential Revision: https://reviews.llvm.org/D86664
This commit is contained in:
parent
dc79f6327a
commit
a7636dc8f8
@ -147,9 +147,10 @@ public:
|
||||
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
|
||||
bool applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo);
|
||||
|
||||
bool matchElideBrByInvertingCond(MachineInstr &MI);
|
||||
void applyElideBrByInvertingCond(MachineInstr &MI);
|
||||
bool tryElideBrByInvertingCond(MachineInstr &MI);
|
||||
/// If a brcond's true block is not the fallthrough, make it so by inverting
|
||||
/// the condition and swapping operands.
|
||||
bool matchOptBrCondByInvertingCond(MachineInstr &MI);
|
||||
void applyOptBrCondByInvertingCond(MachineInstr &MI);
|
||||
|
||||
/// If \p MI is G_CONCAT_VECTORS, try to combine it.
|
||||
/// Returns true if MI changed.
|
||||
|
@ -245,5 +245,9 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
|
||||
/// the value \p Val contains a true value.
|
||||
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
bool IsFP);
|
||||
|
||||
/// Returns an integer representing true, as defined by the
|
||||
/// TargetBooleanContents.
|
||||
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
|
||||
} // End namespace llvm.
|
||||
#endif
|
||||
|
@ -145,13 +145,11 @@ def combine_indexed_load_store : GICombineRule<
|
||||
[{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>;
|
||||
|
||||
// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of
|
||||
// all_combines because it wasn't there.
|
||||
def elide_br_by_inverting_cond : GICombineRule<
|
||||
def opt_brcond_by_inverting_cond : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_BR):$root,
|
||||
[{ return Helper.matchElideBrByInvertingCond(*${root}); }]),
|
||||
(apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>;
|
||||
[{ return Helper.matchOptBrCondByInvertingCond(*${root}); }]),
|
||||
(apply [{ Helper.applyOptBrCondByInvertingCond(*${root}); }])>;
|
||||
|
||||
def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">;
|
||||
def ptr_add_immed_chain : GICombineRule<
|
||||
@ -416,4 +414,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
|
||||
shl_ashr_to_sext_inreg, sext_inreg_of_load,
|
||||
width_reduction_combines, select_combines,
|
||||
known_bits_simplifications, ext_ext_fold,
|
||||
not_cmp_fold]>;
|
||||
not_cmp_fold, opt_brcond_by_inverting_cond]>;
|
||||
|
@ -881,14 +881,12 @@ void CombinerHelper::applyCombineIndexedLoadStore(
|
||||
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
|
||||
bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
|
||||
if (MI.getOpcode() != TargetOpcode::G_BR)
|
||||
return false;
|
||||
|
||||
// Try to match the following:
|
||||
// bb1:
|
||||
// %c(s32) = G_ICMP pred, %a, %b
|
||||
// %c1(s1) = G_TRUNC %c(s32)
|
||||
// G_BRCOND %c1, %bb2
|
||||
// G_BR %bb3
|
||||
// bb2:
|
||||
@ -898,7 +896,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
|
||||
// The above pattern does not have a fall through to the successor bb2, always
|
||||
// resulting in a branch no matter which path is taken. Here we try to find
|
||||
// and replace that pattern with conditional branch to bb3 and otherwise
|
||||
// fallthrough to bb2.
|
||||
// fallthrough to bb2. This is generally better for branch predictors.
|
||||
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
MachineBasicBlock::iterator BrIt(MI);
|
||||
@ -913,40 +911,34 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
|
||||
// Check that the next block is the conditional branch target.
|
||||
if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
|
||||
return false;
|
||||
|
||||
MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
|
||||
if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
|
||||
!MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
|
||||
if (!matchElideBrByInvertingCond(MI))
|
||||
return false;
|
||||
applyElideBrByInvertingCond(MI);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
|
||||
void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
|
||||
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
|
||||
MachineBasicBlock::iterator BrIt(MI);
|
||||
MachineInstr *BrCond = &*std::prev(BrIt);
|
||||
MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
|
||||
|
||||
CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
|
||||
(CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
|
||||
Builder.setInstrAndDebugLoc(*BrCond);
|
||||
LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
|
||||
// FIXME: Does int/fp matter for this? If so, we might need to restrict
|
||||
// this to i1 only since we might not know for sure what kind of
|
||||
// compare generated the condition value.
|
||||
auto True = Builder.buildConstant(
|
||||
Ty, getICmpTrueVal(getTargetLowering(), false, false));
|
||||
auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
|
||||
|
||||
// Invert the G_ICMP condition.
|
||||
Observer.changingInstr(*CmpMI);
|
||||
CmpMI->getOperand(1).setPredicate(InversePred);
|
||||
Observer.changedInstr(*CmpMI);
|
||||
auto *FallthroughBB = BrCond->getOperand(1).getMBB();
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(0).setMBB(FallthroughBB);
|
||||
Observer.changedInstr(MI);
|
||||
|
||||
// Change the conditional branch target.
|
||||
// Change the conditional branch to use the inverted condition and
|
||||
// new target block.
|
||||
Observer.changingInstr(*BrCond);
|
||||
BrCond->getOperand(0).setReg(Xor.getReg(0));
|
||||
BrCond->getOperand(1).setMBB(BrTarget);
|
||||
Observer.changedInstr(*BrCond);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
|
||||
|
@ -740,3 +740,15 @@ bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||
}
|
||||
llvm_unreachable("Invalid boolean contents");
|
||||
}
|
||||
|
||||
int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
|
||||
bool IsFP) {
|
||||
switch (TLI.getBooleanContents(IsVector, IsFP)) {
|
||||
case TargetLowering::UndefinedBooleanContent:
|
||||
case TargetLowering::ZeroOrOneBooleanContent:
|
||||
return 1;
|
||||
case TargetLowering::ZeroOrNegativeOneBooleanContent:
|
||||
return -1;
|
||||
}
|
||||
llvm_unreachable("Invalid boolean contents");
|
||||
}
|
||||
|
@ -19,7 +19,6 @@ def fconstant_to_constant : GICombineRule<
|
||||
|
||||
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
|
||||
elide_br_by_inverting_cond,
|
||||
fconstant_to_constant]> {
|
||||
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
|
||||
let StateClass = "AArch64PreLegalizerCombinerHelperState";
|
||||
|
@ -42,8 +42,7 @@ def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
|
||||
|
||||
|
||||
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AMDGPUGenPreLegalizerCombinerHelper", [all_combines,
|
||||
elide_br_by_inverting_cond]> {
|
||||
"AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> {
|
||||
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
|
||||
}
|
||||
|
||||
|
@ -1,25 +0,0 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -global-isel -O0 -o - %s | FileCheck %s
|
||||
|
||||
%struct.comp = type { i8*, i32, i8*, [3 x i8], i32 }
|
||||
|
||||
define void @regbranch() {
|
||||
; CHECK-LABEL: regbranch:
|
||||
; CHECK: mov {{w[0-9]+}}, #0
|
||||
cond_next240.i:
|
||||
br i1 false, label %cond_true251.i, label %cond_next272.i
|
||||
|
||||
cond_true251.i:
|
||||
switch i8 0, label %cond_next272.i [
|
||||
i8 42, label %bb268.i
|
||||
i8 43, label %bb268.i
|
||||
i8 63, label %bb268.i
|
||||
]
|
||||
|
||||
bb268.i:
|
||||
br label %cond_next272.i
|
||||
|
||||
cond_next272.i:
|
||||
%len.2.i = phi i32 [ 0, %bb268.i ], [ 0, %cond_next240.i ], [ 0, %cond_true251.i ]
|
||||
%tmp278.i = icmp eq i32 %len.2.i, 1
|
||||
ret void
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="opt_brcond_by_inverting_cond" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "arm64-apple-ios5.0.0"
|
||||
@ -38,8 +38,11 @@ body: |
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), [[C]]
|
||||
; CHECK: G_BRCOND [[ICMP]](s1), %bb.2
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
|
||||
; CHECK: G_BRCOND [[XOR]](s1), %bb.2
|
||||
; CHECK: G_BR %bb.1
|
||||
; CHECK: bb.1.if.then:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]]
|
||||
|
@ -8,6 +8,8 @@
|
||||
define i16 @const_s16() { ret i16 42 }
|
||||
define i32 @const_s32() { ret i32 42 }
|
||||
define i64 @const_s64() { ret i64 1234567890123 }
|
||||
define i32 @const_s32_zero() { ret i32 0 }
|
||||
define i64 @const_s64_zero() { ret i64 0 }
|
||||
define i8* @const_p0_0() { ret i8* null }
|
||||
|
||||
define i32 @fconst_s32() { ret i32 42 }
|
||||
@ -81,6 +83,38 @@ body: |
|
||||
$x0 = COPY %0(s64)
|
||||
...
|
||||
|
||||
---
|
||||
name: const_s32_zero
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: const_s32_zero
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr
|
||||
; CHECK: $w0 = COPY [[COPY]]
|
||||
%0(s32) = G_CONSTANT i32 0
|
||||
$w0 = COPY %0(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: const_s64_zero
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: const_s64_zero
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $xzr
|
||||
; CHECK: $x0 = COPY [[COPY]]
|
||||
%0(s64) = G_CONSTANT i64 0
|
||||
$x0 = COPY %0(s64)
|
||||
...
|
||||
|
||||
---
|
||||
name: const_p0_0
|
||||
legalized: true
|
||||
|
@ -52,9 +52,10 @@ define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) {
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 BB3_2
|
||||
; GCN-NEXT: s_cbranch_scc1 BB3_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: flat_store_dword v[0:1], v0
|
||||
@ -80,9 +81,10 @@ define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, 1
|
||||
; GCN-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 BB4_2
|
||||
; GCN-NEXT: s_cbranch_scc1 BB4_2
|
||||
; GCN-NEXT: ; %bb.1: ; %bb0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GCN-NEXT: flat_store_dword v[0:1], v0
|
||||
|
@ -51,11 +51,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_load_dword s0, s[4:5], 0x11
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_cmp_eq_u32 s1, s0
|
||||
; CI-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; CI-NEXT: s_cselect_b32 s0, 1, 0
|
||||
; CI-NEXT: s_and_b32 s0, s0, 1
|
||||
; CI-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CI-NEXT: s_cbranch_scc0 BB1_2
|
||||
; CI-NEXT: s_cbranch_scc1 BB1_2
|
||||
; CI-NEXT: ; %bb.1: ; %bb0
|
||||
; CI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CI-NEXT: flat_store_dword v[0:1], v0
|
||||
@ -68,11 +68,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX9-NEXT: s_cmp_eq_u32 s1, s0
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; GFX9-NEXT: s_cselect_b32 s0, 1, 0
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -51,11 +51,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_load_dword s0, s[4:5], 0x10
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_cmp_eq_u32 s1, s0
|
||||
; CI-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; CI-NEXT: s_cselect_b32 s0, 1, 0
|
||||
; CI-NEXT: s_and_b32 s0, s0, 1
|
||||
; CI-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CI-NEXT: s_cbranch_scc0 BB1_2
|
||||
; CI-NEXT: s_cbranch_scc1 BB1_2
|
||||
; CI-NEXT: ; %bb.1: ; %bb0
|
||||
; CI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CI-NEXT: flat_store_dword v[0:1], v0
|
||||
@ -68,11 +68,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX9-NEXT: s_cmp_eq_u32 s1, s0
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s1, s0
|
||||
; GFX9-NEXT: s_cselect_b32 s0, 1, 0
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_2
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB1_2
|
||||
; GFX9-NEXT: ; %bb.1: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
|
@ -29,9 +29,10 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX9-NEXT: BB0_2: ; %Flow
|
||||
; GFX9-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB0_4
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB0_4
|
||||
; GFX9-NEXT: ; %bb.3: ; %bb0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
@ -109,9 +110,10 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
||||
; GFX9-NEXT: BB1_2: ; %Flow
|
||||
; GFX9-NEXT: s_xor_b32 s0, s0, -1
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; GFX9-NEXT: s_cbranch_scc0 BB1_4
|
||||
; GFX9-NEXT: s_cbranch_scc1 BB1_4
|
||||
; GFX9-NEXT: ; %bb.3: ; %bb0
|
||||
; GFX9-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
|
||||
|
@ -357,9 +357,10 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s0, s1, 1
|
||||
; CHECK-NEXT: s_xor_b32 s0, s1, -1
|
||||
; CHECK-NEXT: s_and_b32 s0, s0, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
|
||||
; CHECK-NEXT: s_sub_i32 s0, 0, s4
|
||||
|
@ -351,9 +351,10 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s0, s1, 1
|
||||
; CHECK-NEXT: s_xor_b32 s0, s1, -1
|
||||
; CHECK-NEXT: s_and_b32 s0, s0, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
|
||||
; CHECK-NEXT: s_sub_i32 s0, 0, s4
|
||||
|
@ -323,9 +323,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s1, s5, 1
|
||||
; CHECK-NEXT: s_xor_b32 s1, s5, -1
|
||||
; CHECK-NEXT: s_and_b32 s1, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: s_sub_i32 s1, 0, s2
|
||||
|
@ -319,9 +319,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
|
||||
; CHECK-NEXT: BB1_2:
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
|
||||
; CHECK-NEXT: BB1_3: ; %Flow
|
||||
; CHECK-NEXT: s_and_b32 s1, s5, 1
|
||||
; CHECK-NEXT: s_xor_b32 s1, s5, -1
|
||||
; CHECK-NEXT: s_and_b32 s1, s1, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB1_5
|
||||
; CHECK-NEXT: s_cbranch_scc1 BB1_5
|
||||
; CHECK-NEXT: ; %bb.4:
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
|
||||
; CHECK-NEXT: s_sub_i32 s1, 0, s2
|
||||
|
Loading…
x
Reference in New Issue
Block a user