mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
Revert [MBP] Disable aggressive loop rotate in plain mode
This reverts r369664 (git commit 51f48295cbe8fa3a44db263b528dd9f7bae7bf9a) It causes many benchmark regressions, internally and in llvm's benchmark suite. llvm-svn: 370398
This commit is contained in:
parent
fcd4894cf8
commit
650c315835
@ -462,20 +462,17 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
||||
const MachineBasicBlock *ExitBB,
|
||||
const BlockFilterSet &LoopBlockSet);
|
||||
MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
|
||||
const MachineLoop &L,
|
||||
const BlockFilterSet &LoopBlockSet,
|
||||
bool HasStaticProfileOnly = false);
|
||||
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
|
||||
MachineBasicBlock *findBestLoopTop(
|
||||
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
|
||||
MachineBasicBlock *findBestLoopTopNoProfile(
|
||||
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
|
||||
MachineBasicBlock *findBestLoopExit(
|
||||
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
|
||||
const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
|
||||
BlockFrequency &ExitFreq);
|
||||
BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
|
||||
void buildLoopChains(const MachineLoop &L);
|
||||
void rotateLoop(
|
||||
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
|
||||
const BlockFilterSet &LoopBlockSet);
|
||||
BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
|
||||
void rotateLoopWithProfile(
|
||||
BlockChain &LoopChain, const MachineLoop &L,
|
||||
const BlockFilterSet &LoopBlockSet);
|
||||
@ -1950,14 +1947,11 @@ MachineBlockPlacement::FallThroughGains(
|
||||
/// At the same time, move it before old top increases the taken branch
|
||||
/// to loop exit block, so the reduced taken branch will be compared with
|
||||
/// the increased taken branch to the loop exit block.
|
||||
///
|
||||
/// This pattern is enabled only when HasStaticProfileOnly is false.
|
||||
MachineBasicBlock *
|
||||
MachineBlockPlacement::findBestLoopTopHelper(
|
||||
MachineBasicBlock *OldTop,
|
||||
const MachineLoop &L,
|
||||
const BlockFilterSet &LoopBlockSet,
|
||||
bool HasStaticProfileOnly) {
|
||||
const BlockFilterSet &LoopBlockSet) {
|
||||
// Check that the header hasn't been fused with a preheader block due to
|
||||
// crazy branches. If it has, we need to start with the header at the top to
|
||||
// prevent pulling the preheader into the loop body.
|
||||
@ -1981,38 +1975,22 @@ MachineBlockPlacement::findBestLoopTopHelper(
|
||||
if (Pred->succ_size() > 2)
|
||||
continue;
|
||||
|
||||
MachineBasicBlock *OtherBB = nullptr;
|
||||
if (Pred->succ_size() == 2) {
|
||||
OtherBB = *Pred->succ_begin();
|
||||
if (OtherBB == OldTop)
|
||||
OtherBB = *Pred->succ_rbegin();
|
||||
}
|
||||
|
||||
if (!canMoveBottomBlockToTop(Pred, OldTop))
|
||||
continue;
|
||||
|
||||
if (HasStaticProfileOnly) {
|
||||
// In plain mode we consider pattern 1 only.
|
||||
if (Pred->succ_size() > 1)
|
||||
continue;
|
||||
|
||||
BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
|
||||
if (!BestPred || PredFreq > BestGains ||
|
||||
(!(PredFreq < BestGains) &&
|
||||
Pred->isLayoutSuccessor(OldTop))) {
|
||||
BestPred = Pred;
|
||||
BestGains = PredFreq;
|
||||
}
|
||||
} else {
|
||||
// With profile information we also consider pattern 2.
|
||||
MachineBasicBlock *OtherBB = nullptr;
|
||||
if (Pred->succ_size() == 2) {
|
||||
OtherBB = *Pred->succ_begin();
|
||||
if (OtherBB == OldTop)
|
||||
OtherBB = *Pred->succ_rbegin();
|
||||
}
|
||||
|
||||
// And more sophisticated cost model.
|
||||
BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
|
||||
LoopBlockSet);
|
||||
if ((Gains > 0) && (Gains > BestGains ||
|
||||
((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
|
||||
BestPred = Pred;
|
||||
BestGains = Gains;
|
||||
}
|
||||
BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
|
||||
LoopBlockSet);
|
||||
if ((Gains > 0) && (Gains > BestGains ||
|
||||
((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
|
||||
BestPred = Pred;
|
||||
BestGains = Gains;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2032,7 +2010,7 @@ MachineBlockPlacement::findBestLoopTopHelper(
|
||||
return BestPred;
|
||||
}
|
||||
|
||||
/// Find the best loop top block for layout in FDO mode.
|
||||
/// Find the best loop top block for layout.
|
||||
///
|
||||
/// This function iteratively calls findBestLoopTopHelper, until no new better
|
||||
/// BB can be found.
|
||||
@ -2060,34 +2038,6 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
|
||||
return NewTop;
|
||||
}
|
||||
|
||||
/// Find the best loop top block for layout in plain mode. It is less agressive
|
||||
/// than findBestLoopTop.
|
||||
///
|
||||
/// Look for a block which is strictly better than the loop header for laying
|
||||
/// out at the top of the loop. This looks for one and only one pattern:
|
||||
/// a latch block with no conditional exit. This block will cause a conditional
|
||||
/// jump around it or will be the bottom of the loop if we lay it out in place,
|
||||
/// but if it doesn't end up at the bottom of the loop for any reason,
|
||||
/// rotation alone won't fix it. Because such a block will always result in an
|
||||
/// unconditional jump (for the backedge) rotating it in front of the loop
|
||||
/// header is always profitable.
|
||||
MachineBasicBlock *
|
||||
MachineBlockPlacement::findBestLoopTopNoProfile(
|
||||
const MachineLoop &L,
|
||||
const BlockFilterSet &LoopBlockSet) {
|
||||
// Placing the latch block before the header may introduce an extra branch
|
||||
// that skips this block the first time the loop is executed, which we want
|
||||
// to avoid when optimising for size.
|
||||
// FIXME: in theory there is a case that does not introduce a new branch,
|
||||
// i.e. when the layout predecessor does not fallthrough to the loop header.
|
||||
// In practice this never happens though: there always seems to be a preheader
|
||||
// that can fallthrough and that is also placed before the header.
|
||||
if (F->getFunction().hasOptSize())
|
||||
return L.getHeader();
|
||||
|
||||
return findBestLoopTopHelper(L.getHeader(), L, LoopBlockSet, true);
|
||||
}
|
||||
|
||||
/// Find the best loop exiting block for layout.
|
||||
///
|
||||
/// This routine implements the logic to analyze the loop looking for the best
|
||||
@ -2095,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTopNoProfile(
|
||||
/// fallthrough opportunities.
|
||||
MachineBasicBlock *
|
||||
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
|
||||
const BlockFilterSet &LoopBlockSet) {
|
||||
const BlockFilterSet &LoopBlockSet,
|
||||
BlockFrequency &ExitFreq) {
|
||||
// We don't want to layout the loop linearly in all cases. If the loop header
|
||||
// is just a normal basic block in the loop, we want to look for what block
|
||||
// within the loop is the best one to layout at the top. However, if the loop
|
||||
@ -2206,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
|
||||
|
||||
LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
|
||||
<< "\n");
|
||||
ExitFreq = BestExitEdgeFreq;
|
||||
return ExitingBB;
|
||||
}
|
||||
|
||||
@ -2250,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallthrough(
|
||||
/// of its bottom already, don't rotate it.
|
||||
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
|
||||
const MachineBasicBlock *ExitingBB,
|
||||
BlockFrequency ExitFreq,
|
||||
const BlockFilterSet &LoopBlockSet) {
|
||||
if (!ExitingBB)
|
||||
return;
|
||||
@ -2273,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
|
||||
(!SuccChain || Succ == *SuccChain->begin()))
|
||||
return;
|
||||
}
|
||||
|
||||
// Rotate will destroy the top fallthrough, we need to ensure the new exit
|
||||
// frequency is larger than top fallthrough.
|
||||
BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
|
||||
if (FallThrough2Top >= ExitFreq)
|
||||
return;
|
||||
}
|
||||
|
||||
BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
|
||||
@ -2524,10 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
|
||||
// loop. This will default to the header, but may end up as one of the
|
||||
// predecessors to the header if there is one which will result in strictly
|
||||
// fewer branches in the loop body.
|
||||
MachineBasicBlock *LoopTop =
|
||||
(RotateLoopWithProfile || F->getFunction().hasProfileData()) ?
|
||||
findBestLoopTop(L, LoopBlockSet) :
|
||||
findBestLoopTopNoProfile(L, LoopBlockSet);
|
||||
MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
|
||||
|
||||
// If we selected just the header for the loop top, look for a potentially
|
||||
// profitable exit block in the event that rotating the loop can eliminate
|
||||
@ -2536,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
|
||||
// Loops are processed innermost to uttermost, make sure we clear
|
||||
// PreferredLoopExit before processing a new loop.
|
||||
PreferredLoopExit = nullptr;
|
||||
BlockFrequency ExitFreq;
|
||||
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
|
||||
PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
|
||||
PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
|
||||
|
||||
BlockChain &LoopChain = *BlockToChain[LoopTop];
|
||||
|
||||
@ -2554,11 +2511,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
|
||||
|
||||
buildChain(LoopTop, LoopChain, &LoopBlockSet);
|
||||
|
||||
if (RotateLoopWithProfile) {
|
||||
if (LoopTop == L.getHeader())
|
||||
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
|
||||
} else
|
||||
rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
|
||||
if (RotateLoopWithProfile)
|
||||
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
|
||||
else
|
||||
rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
|
||||
|
||||
LLVM_DEBUG({
|
||||
// Crash at the end so we get all of the debugging output first.
|
||||
|
@ -111,7 +111,7 @@ define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) {
|
||||
; CHECK: mov w22, #2
|
||||
; CHECK-NOT: mov w22, #4
|
||||
; CHECK-NOT: cmn w22, #4
|
||||
; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond
|
||||
; CHECK-NOT: b.ne [[LOOP2]]
|
||||
; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}}
|
||||
; CHECK: bl _foo
|
||||
|
@ -1,9 +1,8 @@
|
||||
; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: LBB0_7:
|
||||
; CHECK: b.hi
|
||||
; CHECK-NEXT: b
|
||||
; CHECK-LABEL: %cond.false12.i
|
||||
; CHECK: b.gt
|
||||
; CHECK-NEXT: LBB0_8:
|
||||
; CHECK-NEXT: mov x8, x9
|
||||
; CHECK-NEXT: LBB0_9:
|
||||
|
@ -230,6 +230,11 @@ bb.end: ; preds = %bb.then, %bb
|
||||
; Make sure scc liveness is updated if sor_b64 is removed
|
||||
; ALL-LABEL: {{^}}scc_liveness:
|
||||
|
||||
; GCN: %bb10
|
||||
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: s_andn2_b64
|
||||
; GCN-NEXT: s_cbranch_execz
|
||||
|
||||
; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: s_andn2_b64 exec, exec,
|
||||
; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
|
||||
@ -239,10 +244,6 @@ bb.end: ; preds = %bb.then, %bb
|
||||
|
||||
; GCN-NOT: s_or_b64 exec, exec
|
||||
|
||||
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: s_andn2_b64
|
||||
; GCN-NEXT: s_cbranch_execnz
|
||||
|
||||
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_store_dword
|
||||
|
@ -20,29 +20,13 @@ define amdgpu_ps void @main(i32, float) {
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
|
||||
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
|
||||
; CHECK-NEXT: BB0_1: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
|
||||
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec
|
||||
; CHECK-NEXT: s_cbranch_vccz BB0_5
|
||||
; CHECK-NEXT: ; %bb.2: ; %endif1
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], -1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
|
||||
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
|
||||
; CHECK-NEXT: ; mask branch BB0_4
|
||||
; CHECK-NEXT: BB0_3: ; %endif2
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
|
||||
; CHECK-NEXT: BB0_4: ; %Flow1
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: s_branch BB0_3
|
||||
; CHECK-NEXT: BB0_1: ; %Flow1
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
||||
; CHECK-NEXT: BB0_5: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: BB0_2: ; %Flow
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
|
||||
; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5]
|
||||
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
||||
@ -50,8 +34,27 @@ define amdgpu_ps void @main(i32, float) {
|
||||
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11]
|
||||
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
||||
; CHECK-NEXT: s_cbranch_execnz BB0_1
|
||||
; CHECK-NEXT: ; %bb.6: ; %Flow2
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_6
|
||||
; CHECK-NEXT: BB0_3: ; %loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, vcc
|
||||
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
|
||||
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec
|
||||
; CHECK-NEXT: s_cbranch_vccz BB0_2
|
||||
; CHECK-NEXT: ; %bb.4: ; %endif1
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], -1
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
|
||||
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
|
||||
; CHECK-NEXT: ; mask branch BB0_1
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_1
|
||||
; CHECK-NEXT: BB0_5: ; %endif2
|
||||
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
|
||||
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
|
||||
; CHECK-NEXT: s_branch BB0_1
|
||||
; CHECK-NEXT: BB0_6: ; %Flow2
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
|
||||
@ -62,6 +65,7 @@ define amdgpu_ps void @main(i32, float) {
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
|
||||
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; this is the divergent branch with the condition not marked as divergent
|
||||
start:
|
||||
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
|
||||
br label %loop
|
||||
|
@ -1,16 +1,5 @@
|
||||
; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: %bb11
|
||||
|
||||
; Load from %arg in a Loop body has alias store
|
||||
|
||||
; CHECK: flat_load_dword
|
||||
|
||||
; CHECK-LABEL: %bb20
|
||||
; CHECK: flat_store_dword
|
||||
|
||||
; #####################################################################
|
||||
|
||||
; CHECK-LABEL: %bb22
|
||||
|
||||
; Load from %arg has alias store in Loop
|
||||
@ -23,6 +12,18 @@
|
||||
|
||||
; CHECK: s_load_dword
|
||||
|
||||
; #####################################################################
|
||||
|
||||
; CHECK-LABEL: %bb11
|
||||
|
||||
; Load from %arg in a Loop body has alias store
|
||||
|
||||
; CHECK: flat_load_dword
|
||||
|
||||
; CHECK-LABEL: %bb20
|
||||
|
||||
; CHECK: flat_store_dword
|
||||
|
||||
define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
|
||||
bb:
|
||||
%tmp = sext i32 %arg2 to i64
|
||||
|
@ -3,20 +3,20 @@
|
||||
|
||||
; SI-LABEL: {{^}}i1_copy_from_loop:
|
||||
;
|
||||
; SI: ; %Flow
|
||||
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
|
||||
; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
|
||||
|
||||
; SI: ; %for.body
|
||||
; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
|
||||
; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
|
||||
; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec
|
||||
; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
|
||||
; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
|
||||
|
||||
; SI: ; %Flow1
|
||||
; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec
|
||||
|
||||
; SI: ; %Flow
|
||||
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
|
||||
; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
|
||||
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
|
||||
|
||||
; SI: ; %for.end
|
||||
; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]]
|
||||
|
||||
|
@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
|
||||
; GCN-LABEL: {{^}}broken_phi_bb:
|
||||
; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8
|
||||
|
||||
; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: {{^BB[0-9]+_[0-9]+}}:
|
||||
; GCN: s_mov_b64 exec,
|
||||
|
||||
; GCN: [[BB2]]:
|
||||
; GCN: [[BB2:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
|
||||
; GCN: buffer_load_dword
|
||||
|
||||
@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
|
||||
; IDXMODE: s_set_gpr_idx_off
|
||||
|
||||
; GCN: s_cbranch_execnz [[REGLOOP]]
|
||||
|
||||
; GCN: {{^; %bb.[0-9]}}:
|
||||
; GCN: s_mov_b64 exec,
|
||||
; GCN: s_branch [[BB2]]
|
||||
|
||||
define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
|
||||
bb:
|
||||
br label %bb2
|
||||
|
@ -61,9 +61,9 @@ loopexit:
|
||||
|
||||
; GCN-LABEL: {{^}}break_cond_is_arg:
|
||||
; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
|
||||
; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
|
||||
; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
|
||||
; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]],
|
||||
; GCN: s_andn2_b64 exec, exec, [[REG3]]
|
||||
; GCN: s_or_b64 [[REG3]], [[REG2]],
|
||||
|
||||
define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
|
||||
entry:
|
||||
|
@ -24,13 +24,29 @@
|
||||
; GCN: ; %main_body
|
||||
; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
|
||||
; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
|
||||
; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
|
||||
; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[TMP1]]
|
||||
; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
|
||||
; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
|
||||
; GCN: ; %Flow
|
||||
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
|
||||
; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[TMP0]]
|
||||
; GCN: s_cbranch_execz [[FLOW2]]
|
||||
|
||||
; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
|
||||
; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec
|
||||
; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec
|
||||
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
|
||||
; GCN: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec
|
||||
; GCN: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec
|
||||
; GCN: s_and_saveexec_b64 [[SAVE_EXEC]], vcc
|
||||
|
||||
; FIXME: duplicate comparison
|
||||
; GCN: ; %ENDIF
|
||||
@ -43,23 +59,7 @@
|
||||
; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]]
|
||||
; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]]
|
||||
|
||||
; GCN: ; %Flow
|
||||
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]]
|
||||
; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]]
|
||||
; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
|
||||
; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[TMP0]]
|
||||
; GCN: s_cbranch_execnz [[INNER_LOOP]]
|
||||
|
||||
; GCN: ; %Flow2
|
||||
; GCN: s_or_b64 exec, exec, [[TMP0]]
|
||||
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]]
|
||||
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
|
||||
; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[TMP1]]
|
||||
; GCN: s_cbranch_execnz [[OUTER_LOOP]]
|
||||
|
||||
; GCN: ; %IF
|
||||
; GCN: [[IF_BLOCK]]: ; %IF
|
||||
; GCN-NEXT: s_endpgm
|
||||
define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
|
||||
main_body:
|
||||
@ -92,12 +92,18 @@ ENDIF: ; preds = %LOOP
|
||||
; GCN-LABEL: {{^}}multi_if_break_loop:
|
||||
; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
|
||||
; GCN: ; %Flow4
|
||||
; GCN: s_and_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]]
|
||||
; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[LEFT]]
|
||||
; GCN-NEXT: s_cbranch_execz
|
||||
|
||||
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}}
|
||||
; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
|
||||
; GCN: s_mov_b64 [[OLD_LEFT]], [[LEFT]]
|
||||
|
||||
; GCN: ; %LeafBlock1
|
||||
; GCN: s_mov_b64
|
||||
; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; GCN: s_mov_b64 [[BREAK]], -1{{$}}
|
||||
|
||||
; GCN: ; %case1
|
||||
; GCN: buffer_load_dword [[LOAD2:v[0-9]+]],
|
||||
@ -118,12 +124,6 @@ ENDIF: ; preds = %LOOP
|
||||
; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
|
||||
; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]]
|
||||
|
||||
; GCN: ; %Flow4
|
||||
; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]]
|
||||
; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]]
|
||||
; GCN: s_andn2_b64 exec, exec, [[LEFT]]
|
||||
; GCN-NEXT: s_cbranch_execnz
|
||||
|
||||
define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
|
||||
bb:
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -3,11 +3,11 @@
|
||||
; GCN-LABEL: {{^}}negated_cond:
|
||||
; GCN: BB0_1:
|
||||
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
|
||||
; GCN: BB0_2:
|
||||
; GCN: BB0_3:
|
||||
; GCN-NOT: v_cndmask_b32
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: s_andn2_b64 vcc, exec, [[CC]]
|
||||
; GCN: s_cbranch_vccnz BB0_4
|
||||
; GCN: s_cbranch_vccnz BB0_2
|
||||
define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
|
||||
bb:
|
||||
br label %bb1
|
||||
@ -36,11 +36,11 @@ bb4:
|
||||
|
||||
; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
|
||||
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
|
||||
; GCN: BB1_1:
|
||||
; GCN: %bb4
|
||||
; GCN-NOT: v_cndmask_b32
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: s_andn2_b64 vcc, exec, [[CC]]
|
||||
; GCN: s_cbranch_vccz BB1_3
|
||||
; GCN: s_cbranch_vccnz BB1_1
|
||||
define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
|
||||
bb:
|
||||
br label %bb2
|
||||
|
@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
; FUNC-LABEL: {{^}}loop_land_info_assert:
|
||||
; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
|
||||
; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
|
||||
; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond
|
||||
; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; SI: [[CONVEX_EXIT:BB[0-9_]+]]
|
||||
; SI: s_mov_b64 vcc,
|
||||
; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
|
||||
; SI: s_cbranch_vccnz [[INFLOOP]]
|
||||
|
||||
; SI: s_cbranch_vccnz [[WHILELOOP]]
|
||||
|
||||
; SI: ; %if.else
|
||||
; SI: buffer_store_dword
|
||||
|
||||
; SI: [[INFLOOP]]:
|
||||
; SI: s_cbranch_vccnz [[CONVEX_EXIT]]
|
||||
|
||||
; SI: ; %for.cond.preheader
|
||||
; SI: [[FOR_COND_PH]]: ; %for.cond.preheader
|
||||
; SI: s_cbranch_vccz [[ENDPGM]]
|
||||
|
||||
; SI: [[ENDPGM]]:
|
||||
|
@ -166,30 +166,29 @@ endif:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_loop_with_if:
|
||||
; GCN: BB{{.*}}: ; %bb2
|
||||
; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}}
|
||||
; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}}
|
||||
; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}]
|
||||
; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}:
|
||||
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}: ; %bb5
|
||||
; GCN: BB{{.*}}: ; %Flow
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}}
|
||||
; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
|
||||
; GCN: ; mask branch BB
|
||||
; GCN: BB{{.*}}: ; %bb11
|
||||
; GCN: BB{{.*}}: ; %Flow1
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}}
|
||||
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}]
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
||||
; GCN: ; mask branch BB
|
||||
; GCN: BB{{.*}}: ; %bb10
|
||||
; GCN: BB{{.*}}: ; %bb13
|
||||
; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}}
|
||||
; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}}
|
||||
; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}]
|
||||
; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
|
||||
; GCN: s_cbranch_execnz
|
||||
; GCN: ; %bb1
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
@ -231,17 +230,16 @@ bb13:
|
||||
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
|
||||
; GCN: ; mask branch
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}: ; %.preheader
|
||||
; GCN: ; %bb8
|
||||
; GCN: BB{{.*}}:
|
||||
; GCN: BB{{.*}}:
|
||||
; GFX1032: s_andn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, exec_lo
|
||||
; GFX1064: s_andn2_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], exec
|
||||
; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}}
|
||||
; GFX1032: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}]
|
||||
; GFX1064: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
|
||||
; GCN: BB{{.*}}: ; %Flow
|
||||
; GCN: s_cbranch_execnz
|
||||
; GCN: BB{{.*}}: ; %.loopexit
|
||||
; GCN: s_cbranch_execz
|
||||
; GCN: BB{{.*}}:
|
||||
define amdgpu_kernel void @test_loop_with_if_else_break(i32 addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -657,7 +655,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
|
||||
; GCN-LABEL: {{^}}test_loop_vcc:
|
||||
; GFX1032: v_cmp_lt_f32_e32 vcc_lo,
|
||||
; GFX1064: v_cmp_lt_f32_e32 vcc,
|
||||
; GCN: s_cbranch_vccz
|
||||
; GCN: s_cbranch_vccnz
|
||||
define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 {
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -650,12 +650,15 @@ main_body:
|
||||
; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
|
||||
; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000
|
||||
|
||||
; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
|
||||
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
|
||||
; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop
|
||||
; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
|
||||
; CHECK: s_cbranch_vccz [[LOOPHDR]]
|
||||
; CHECK: ; %break
|
||||
; CHECK: s_cbranch_vccnz
|
||||
|
||||
; CHECK: ; %body
|
||||
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
|
||||
; CHECK: s_branch [[LOOPHDR]]
|
||||
|
||||
; CHECK: ; %break
|
||||
; CHECK: ; return
|
||||
define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
|
||||
entry:
|
||||
|
@ -26,7 +26,7 @@ bb1: ; preds = %bb
|
||||
|
||||
bb2: ; preds = %bb1, %entry
|
||||
; CHECK: cmp [[REG]], #0
|
||||
; CHECK: ble
|
||||
; CHECK: bgt
|
||||
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
|
||||
%tries.0 = sub i32 2147483647, %indvar
|
||||
%tmp1 = icmp sgt i32 %tries.0, 0
|
||||
|
@ -47,9 +47,8 @@ tailrecurse.switch: ; preds = %tailrecurse
|
||||
; V8-NEXT: beq
|
||||
; V8-NEXT: %tailrecurse.switch
|
||||
; V8: cmp
|
||||
; V8-NEXT: beq
|
||||
; V8-NEXT: %sw.epilog
|
||||
; V8-NEXT: bx lr
|
||||
; V8-NEXT: bne
|
||||
; V8-NEXT: %sw.bb
|
||||
switch i32 %and, label %sw.epilog [
|
||||
i32 1, label %sw.bb
|
||||
i32 3, label %sw.bb6
|
||||
|
@ -9,8 +9,8 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
|
||||
; ARM: clrex
|
||||
|
||||
; T2-LABEL: t:
|
||||
; T2: strexb
|
||||
; T2: ldrexb
|
||||
; T2: strexb
|
||||
; T2: clrex
|
||||
%tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
|
||||
%tmp1 = extractvalue { i8, i1 } %tmp0, 0
|
||||
|
@ -52,16 +52,16 @@ entry:
|
||||
; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
|
||||
; CHECK-ARMV7-NEXT: .fnstart
|
||||
; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
|
||||
; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
|
||||
; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
|
||||
; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
|
||||
; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
|
||||
; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS:r[0-9]+]], [r0]
|
||||
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
|
||||
; CHECK-ARMV7-NEXT: bne [[EXIT:.LBB[0-9_]+]]
|
||||
; CHECK-ARMV7-NEXT: strexb [[SUCCESS]], r2, [r0]
|
||||
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
|
||||
; CHECK-ARMV7-NEXT: moveq r0, #1
|
||||
; CHECK-ARMV7-NEXT: bxeq lr
|
||||
; CHECK-ARMV7-NEXT: [[TRY]]:
|
||||
; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
|
||||
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
|
||||
; CHECK-ARMV7-NEXT: beq [[HEAD]]
|
||||
; CHECK-ARMV7-NEXT: b [[TRY]]
|
||||
; CHECK-ARMV7-NEXT: [[EXIT]]:
|
||||
; CHECK-ARMV7-NEXT: mov r0, #0
|
||||
; CHECK-ARMV7-NEXT: clrex
|
||||
; CHECK-ARMV7-NEXT: bx lr
|
||||
@ -69,17 +69,17 @@ entry:
|
||||
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
|
||||
; CHECK-THUMBV7-NEXT: .fnstart
|
||||
; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
|
||||
; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
|
||||
; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
|
||||
; CHECK-THUMBV7-NEXT: [[TRYLD:.LBB[0-9_]+]]
|
||||
; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
|
||||
; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
|
||||
; CHECK-THUMBV7-NEXT: bne [[EXIT:.LBB[0-9_]+]]
|
||||
; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
|
||||
; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0
|
||||
; CHECK-THUMBV7-NEXT: itt eq
|
||||
; CHECK-THUMBV7-NEXT: moveq r0, #1
|
||||
; CHECK-THUMBV7-NEXT: bxeq lr
|
||||
; CHECK-THUMBV7-NEXT: [[TRYLD]]:
|
||||
; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
|
||||
; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
|
||||
; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
|
||||
; CHECK-THUMBV7-NEXT: b [[TRYLD]]
|
||||
; CHECK-THUMBV7-NEXT: [[EXIT]]:
|
||||
; CHECK-THUMBV7-NEXT: movs r0, #0
|
||||
; CHECK-THUMBV7-NEXT: clrex
|
||||
; CHECK-THUMBV7-NEXT: bx lr
|
||||
|
@ -38,8 +38,9 @@ entry:
|
||||
br i1 %0, label %bb5, label %bb.nph15
|
||||
|
||||
bb1: ; preds = %bb2.preheader, %bb1
|
||||
; CHECK: LBB1_[[BB3:.]]: @ %bb3
|
||||
; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
|
||||
; CHECK: bmi LBB1_[[BB3:.]]
|
||||
; CHECK: bmi LBB1_[[BB3]]
|
||||
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
|
||||
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
|
||||
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
|
||||
@ -53,7 +54,6 @@ bb1: ; preds = %bb2.preheader, %bb1
|
||||
bb3: ; preds = %bb1, %bb2.preheader
|
||||
; CHECK: LBB1_[[BB1:.]]: @ %bb1
|
||||
; CHECK: bne LBB1_[[BB1]]
|
||||
; CHECK: LBB1_[[BB3]]: @ %bb3
|
||||
%sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
|
||||
%3 = add i32 %pass.011, 1 ; <i32> [#uses=2]
|
||||
%exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1]
|
||||
|
@ -4,7 +4,7 @@ target triple = "armv7"
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: push {r11, lr}
|
||||
; CHECK: vpush {d8}
|
||||
; CHECK: b .LBB0_2
|
||||
; CHECK: .LBB0_1: @ %tailrecurse
|
||||
define arm_aapcscc double @func() {
|
||||
br label %tailrecurse
|
||||
|
||||
|
@ -4,10 +4,10 @@
|
||||
; This situation can arise due to tail duplication.
|
||||
|
||||
; CHECK: loop1([[LP:.LBB0_[0-9]+]]
|
||||
; CHECK: endloop1
|
||||
; CHECK: [[LP]]:
|
||||
; CHECK-NOT: loop1(
|
||||
; CHECK: endloop1
|
||||
; CHECK: endloop1
|
||||
|
||||
%s.0 = type { i32, i8* }
|
||||
%s.1 = type { i32, i32, i32, i32 }
|
||||
|
@ -2,9 +2,11 @@
|
||||
; Make sure that the loop in the end has only one basic block.
|
||||
|
||||
; CHECK-LABEL: fred
|
||||
; CHECK: %b2
|
||||
; Rely on the comments, make sure the one for the loop header is present.
|
||||
; CHECK: %loop
|
||||
; CHECK-NOT: %should_merge
|
||||
; CHECK: %should_merge
|
||||
; CHECK: %exit
|
||||
|
||||
target triple = "hexagon"
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
|
||||
; Rely on the comments generated by llc. Check that "if.then" was not predicated.
|
||||
; CHECK: b5
|
||||
; CHECK: b2
|
||||
; CHECK-NOT: if{{.*}}memd
|
||||
; CHECK: b5
|
||||
|
||||
%s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
|
||||
%s.1 = type { i32, i32 }
|
||||
|
@ -3,9 +3,9 @@
|
||||
|
||||
; CHECK: memub
|
||||
; CHECK: memub
|
||||
; CHECK: cmp.eq
|
||||
; CHECK: memub
|
||||
; CHECK-NOT: if{{.*}}jump .LBB
|
||||
; CHECK: cmp.eq
|
||||
|
||||
target triple = "hexagon-unknown--elf"
|
||||
|
||||
|
@ -401,16 +401,15 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test40:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: b .LBB40_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB40_1:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB40_2:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB40_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB40_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB40_1
|
||||
; PPC64LE-NEXT: .LBB40_3:
|
||||
; PPC64LE-NEXT: stbcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic
|
||||
@ -466,16 +465,15 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB43_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB43_1:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB43_2:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB43_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB43_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB43_1
|
||||
; PPC64LE-NEXT: .LBB43_3:
|
||||
; PPC64LE-NEXT: stbcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release monotonic
|
||||
@ -487,16 +485,15 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB44_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB44_1:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB44_2:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB44_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB44_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB44_1
|
||||
; PPC64LE-NEXT: .LBB44_3:
|
||||
; PPC64LE-NEXT: stbcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire
|
||||
@ -622,16 +619,15 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test50:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: b .LBB50_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB50_1:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB50_2:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB50_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB50_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB50_1
|
||||
; PPC64LE-NEXT: .LBB50_3:
|
||||
; PPC64LE-NEXT: sthcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic
|
||||
@ -687,16 +683,15 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB53_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB53_1:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB53_2:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB53_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB53_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB53_1
|
||||
; PPC64LE-NEXT: .LBB53_3:
|
||||
; PPC64LE-NEXT: sthcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release monotonic
|
||||
@ -708,16 +703,15 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB54_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB54_1:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB54_2:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB54_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB54_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB54_1
|
||||
; PPC64LE-NEXT: .LBB54_3:
|
||||
; PPC64LE-NEXT: sthcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire
|
||||
@ -842,16 +836,15 @@ define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test60(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
; PPC64LE-LABEL: test60:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: b .LBB60_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB60_1:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB60_2:
|
||||
; PPC64LE-NEXT: lwarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB60_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB60_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB60_1
|
||||
; PPC64LE-NEXT: .LBB60_3:
|
||||
; PPC64LE-NEXT: stwcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic
|
||||
@ -904,16 +897,15 @@ define void @test63(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
; PPC64LE-LABEL: test63:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB63_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB63_1:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB63_2:
|
||||
; PPC64LE-NEXT: lwarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB63_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB63_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB63_1
|
||||
; PPC64LE-NEXT: .LBB63_3:
|
||||
; PPC64LE-NEXT: stwcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release monotonic
|
||||
@ -924,16 +916,15 @@ define void @test64(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
; PPC64LE-LABEL: test64:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB64_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB64_1:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB64_2:
|
||||
; PPC64LE-NEXT: lwarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB64_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB64_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB64_1
|
||||
; PPC64LE-NEXT: .LBB64_3:
|
||||
; PPC64LE-NEXT: stwcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire
|
||||
@ -1053,16 +1044,15 @@ define void @test69(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
define void @test70(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
; PPC64LE-LABEL: test70:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: b .LBB70_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB70_1:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB70_2:
|
||||
; PPC64LE-NEXT: ldarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpd 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB70_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB70_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB70_1
|
||||
; PPC64LE-NEXT: .LBB70_3:
|
||||
; PPC64LE-NEXT: stdcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic
|
||||
@ -1115,16 +1105,15 @@ define void @test73(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
; PPC64LE-LABEL: test73:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB73_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB73_1:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB73_2:
|
||||
; PPC64LE-NEXT: ldarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpd 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB73_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB73_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB73_1
|
||||
; PPC64LE-NEXT: .LBB73_3:
|
||||
; PPC64LE-NEXT: stdcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release monotonic
|
||||
@ -1135,16 +1124,15 @@ define void @test74(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
; PPC64LE-LABEL: test74:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB74_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB74_1:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB74_2:
|
||||
; PPC64LE-NEXT: ldarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpd 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB74_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB74_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB74_1
|
||||
; PPC64LE-NEXT: .LBB74_3:
|
||||
; PPC64LE-NEXT: stdcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire
|
||||
@ -1265,16 +1253,15 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test80:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: b .LBB80_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB80_1:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB80_2:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB80_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB80_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB80_1
|
||||
; PPC64LE-NEXT: .LBB80_3:
|
||||
; PPC64LE-NEXT: stbcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
|
||||
@ -1330,16 +1317,15 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB83_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB83_1:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB83_2:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB83_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB83_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB83_1
|
||||
; PPC64LE-NEXT: .LBB83_3:
|
||||
; PPC64LE-NEXT: stbcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
|
||||
@ -1351,16 +1337,15 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB84_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB84_1:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB84_2:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB84_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB84_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stbcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB84_1
|
||||
; PPC64LE-NEXT: .LBB84_3:
|
||||
; PPC64LE-NEXT: stbcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire
|
||||
@ -1486,16 +1471,15 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test90:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: b .LBB90_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB90_1:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB90_2:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB90_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB90_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b
|
||||
; PPC64LE-NEXT: .LBB90_3:
|
||||
; PPC64LE-NEXT: sthcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
|
||||
@ -1551,16 +1535,15 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB93_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB93_1:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB93_2:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB93_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB93_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB93_1
|
||||
; PPC64LE-NEXT: .LBB93_3:
|
||||
; PPC64LE-NEXT: sthcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
|
||||
@ -1572,16 +1555,15 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB94_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB94_1:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB94_2:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB94_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB94_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: sthcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB94_1
|
||||
; PPC64LE-NEXT: .LBB94_3:
|
||||
; PPC64LE-NEXT: sthcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire
|
||||
@ -1706,16 +1688,15 @@ define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test100(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
; PPC64LE-LABEL: test100:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: b .LBB100_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB100_1:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB100_2:
|
||||
; PPC64LE-NEXT: lwarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB100_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB100_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB100_1
|
||||
; PPC64LE-NEXT: .LBB100_3:
|
||||
; PPC64LE-NEXT: stwcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
|
||||
@ -1768,16 +1749,15 @@ define void @test103(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
; PPC64LE-LABEL: test103:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB103_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB103_1:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB103_2:
|
||||
; PPC64LE-NEXT: lwarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB103_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB103_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB103_1
|
||||
; PPC64LE-NEXT: .LBB103_3:
|
||||
; PPC64LE-NEXT: stwcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
|
||||
@ -1788,16 +1768,15 @@ define void @test104(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
; PPC64LE-LABEL: test104:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB104_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB104_1:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB104_2:
|
||||
; PPC64LE-NEXT: lwarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB104_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB104_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stwcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB104_1
|
||||
; PPC64LE-NEXT: .LBB104_3:
|
||||
; PPC64LE-NEXT: stwcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire
|
||||
@ -1917,16 +1896,15 @@ define void @test109(i32* %ptr, i32 %cmp, i32 %val) {
|
||||
define void @test110(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
; PPC64LE-LABEL: test110:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: b .LBB110_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB110_1:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB110_2:
|
||||
; PPC64LE-NEXT: ldarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpd 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB110_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB110_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB110_1
|
||||
; PPC64LE-NEXT: .LBB110_3:
|
||||
; PPC64LE-NEXT: stdcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
|
||||
@ -1979,16 +1957,15 @@ define void @test113(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
; PPC64LE-LABEL: test113:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB113_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB113_1:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB113_2:
|
||||
; PPC64LE-NEXT: ldarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpd 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB113_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB113_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB113_1
|
||||
; PPC64LE-NEXT: .LBB113_3:
|
||||
; PPC64LE-NEXT: stdcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
|
||||
@ -1999,16 +1976,15 @@ define void @test114(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
; PPC64LE-LABEL: test114:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB114_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB114_1:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: .LBB114_2:
|
||||
; PPC64LE-NEXT: ldarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpd 4, 6
|
||||
; PPC64LE-NEXT: beq 0, .LBB114_1
|
||||
; PPC64LE-NEXT: # %bb.3:
|
||||
; PPC64LE-NEXT: bne 0, .LBB114_3
|
||||
; PPC64LE-NEXT: # %bb.2:
|
||||
; PPC64LE-NEXT: stdcx. 5, 0, 3
|
||||
; PPC64LE-NEXT: beqlr 0
|
||||
; PPC64LE-NEXT: b .LBB114_1
|
||||
; PPC64LE-NEXT: .LBB114_3:
|
||||
; PPC64LE-NEXT: stdcx. 6, 0, 3
|
||||
; PPC64LE-NEXT: blr
|
||||
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire
|
||||
|
@ -298,14 +298,14 @@ body: |
|
||||
|
||||
bb.11.unreachable:
|
||||
|
||||
; CHECK: bb.1.for.body:
|
||||
; CHECK: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
|
||||
; CHECK: B %bb.2
|
||||
|
||||
; CHECK: bb.4.catch4:
|
||||
; CHECK: successors: %bb.11(0x7ffff800), %bb.6(0x00000800)
|
||||
; CHECK: B %bb.11
|
||||
|
||||
; CHECK: bb.1.for.body (align 4):
|
||||
; CHECK: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
|
||||
; CHECK: B %bb.2
|
||||
|
||||
; CHECK: bb.2..noexc:
|
||||
|
||||
; CHECK: bb.11.unreachable:
|
||||
|
@ -718,13 +718,14 @@ if.end:
|
||||
define void @func28(i32 signext %a) {
|
||||
; CHECK-LABEL: @func28
|
||||
; CHECK: cmplwi [[REG1:[0-9]+]], [[REG2:[0-9]+]]
|
||||
; CHECK: .[[LABEL1:[A-Z0-9_]+]]:
|
||||
; CHECK: .[[LABEL2:[A-Z0-9_]+]]:
|
||||
; CHECK: cmpwi [[REG1]], [[REG2]]
|
||||
; CHECK: ble 0, .[[LABEL1:[A-Z0-9_]+]]
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: bne 0, .[[LABEL2:[A-Z0-9_]+]]
|
||||
; CHECK: bne 0, .[[LABEL2]]
|
||||
; CHECK: bl dummy1
|
||||
; CHECK: .[[LABEL2]]:
|
||||
; CHECK: cmpwi [[REG1]], [[REG2]]
|
||||
; CHECK: bgt 0, .[[LABEL1]]
|
||||
; CHECK: b .[[LABEL2]]
|
||||
; CHECK: .[[LABEL1]]:
|
||||
; CHECK: blr
|
||||
entry:
|
||||
br label %do.body
|
||||
|
@ -24,8 +24,7 @@ define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompres
|
||||
; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l
|
||||
; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha
|
||||
; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l
|
||||
; CHECK: b .[[LABEL1:[A-Z0-9_]+]]
|
||||
; CHECK: .[[LABEL1]]: # %for.cond
|
||||
; CHECK: .LBB0_2: # %for.cond
|
||||
; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha
|
||||
; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha
|
||||
; CHECK: bctrl
|
||||
|
@ -75,8 +75,19 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) local_unnamed_
|
||||
; CHECK-P9-NEXT: lis r3, 21845
|
||||
; CHECK-P9-NEXT: add r28, r30, r29
|
||||
; CHECK-P9-NEXT: ori r27, r3, 21846
|
||||
; CHECK-P9-NEXT: b .LBB1_4
|
||||
; CHECK-P9-NEXT: .p2align 4
|
||||
; CHECK-P9-NEXT: .LBB1_1: # %while.cond
|
||||
; CHECK-P9-NEXT: .LBB1_1: # %sw.bb3
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: mulli r3, r30, 23
|
||||
; CHECK-P9-NEXT: .LBB1_2: # %sw.epilog
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: add r28, r3, r28
|
||||
; CHECK-P9-NEXT: .LBB1_3: # %sw.epilog
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: cmpwi r28, 1025
|
||||
; CHECK-P9-NEXT: bge cr0, .LBB1_7
|
||||
; CHECK-P9-NEXT: .LBB1_4: # %while.cond
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: extsw r3, r29
|
||||
; CHECK-P9-NEXT: bl bar
|
||||
@ -95,27 +106,16 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) local_unnamed_
|
||||
; CHECK-P9-NEXT: add r4, r4, r5
|
||||
; CHECK-P9-NEXT: subf r3, r4, r3
|
||||
; CHECK-P9-NEXT: cmplwi r3, 1
|
||||
; CHECK-P9-NEXT: beq cr0, .LBB1_4
|
||||
; CHECK-P9-NEXT: # %bb.2: # %while.cond
|
||||
; CHECK-P9-NEXT: beq cr0, .LBB1_1
|
||||
; CHECK-P9-NEXT: # %bb.5: # %while.cond
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: cmplwi r3, 0
|
||||
; CHECK-P9-NEXT: bne cr0, .LBB1_6
|
||||
; CHECK-P9-NEXT: # %bb.3: # %sw.bb
|
||||
; CHECK-P9-NEXT: bne cr0, .LBB1_3
|
||||
; CHECK-P9-NEXT: # %bb.6: # %sw.bb
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: mulli r3, r29, 13
|
||||
; CHECK-P9-NEXT: b .LBB1_5
|
||||
; CHECK-P9-NEXT: .p2align 4
|
||||
; CHECK-P9-NEXT: .LBB1_4: # %sw.bb3
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: mulli r3, r30, 23
|
||||
; CHECK-P9-NEXT: .LBB1_5: # %sw.epilog
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: add r28, r3, r28
|
||||
; CHECK-P9-NEXT: .LBB1_6: # %sw.epilog
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: cmpwi r28, 1025
|
||||
; CHECK-P9-NEXT: blt cr0, .LBB1_1
|
||||
; CHECK-P9-NEXT: # %bb.7: # %while.end
|
||||
; CHECK-P9-NEXT: b .LBB1_2
|
||||
; CHECK-P9-NEXT: .LBB1_7: # %while.end
|
||||
; CHECK-P9-NEXT: lis r3, -13108
|
||||
; CHECK-P9-NEXT: ori r3, r3, 52429
|
||||
; CHECK-P9-NEXT: mullw r3, r28, r3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -52,24 +52,32 @@ define i32 @test() nounwind {
|
||||
; RV32I-NEXT: lui s0, %hi(d)
|
||||
; RV32I-NEXT: lui s10, %hi(c)
|
||||
; RV32I-NEXT: lui s11, %hi(b)
|
||||
; RV32I-NEXT: .LBB0_2: # %for.body
|
||||
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32I-NEXT: lw a1, %lo(l)(s2)
|
||||
; RV32I-NEXT: beqz a1, .LBB0_4
|
||||
; RV32I-NEXT: # %bb.3: # %if.then
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: bnez a1, .LBB0_4
|
||||
; RV32I-NEXT: j .LBB0_5
|
||||
; RV32I-NEXT: .LBB0_2: # %for.inc
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a0, %lo(a)(s9)
|
||||
; RV32I-NEXT: addi a0, a0, -1
|
||||
; RV32I-NEXT: sw a0, %lo(a)(s9)
|
||||
; RV32I-NEXT: beqz a0, .LBB0_11
|
||||
; RV32I-NEXT: # %bb.3: # %for.body
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a1, %lo(l)(s2)
|
||||
; RV32I-NEXT: beqz a1, .LBB0_5
|
||||
; RV32I-NEXT: .LBB0_4: # %if.then
|
||||
; RV32I-NEXT: lw a4, %lo(e)(s1)
|
||||
; RV32I-NEXT: lw a3, %lo(d)(s0)
|
||||
; RV32I-NEXT: lw a2, %lo(c)(s10)
|
||||
; RV32I-NEXT: lw a1, %lo(b)(s11)
|
||||
; RV32I-NEXT: addi a5, zero, 32
|
||||
; RV32I-NEXT: call foo
|
||||
; RV32I-NEXT: .LBB0_4: # %if.end
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: .LBB0_5: # %if.end
|
||||
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32I-NEXT: lw a0, %lo(k)(s3)
|
||||
; RV32I-NEXT: beqz a0, .LBB0_6
|
||||
; RV32I-NEXT: # %bb.5: # %if.then3
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: beqz a0, .LBB0_7
|
||||
; RV32I-NEXT: # %bb.6: # %if.then3
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a4, %lo(f)(s8)
|
||||
; RV32I-NEXT: lw a3, %lo(e)(s1)
|
||||
; RV32I-NEXT: lw a2, %lo(d)(s0)
|
||||
@ -77,12 +85,12 @@ define i32 @test() nounwind {
|
||||
; RV32I-NEXT: lw a0, %lo(b)(s11)
|
||||
; RV32I-NEXT: addi a5, zero, 64
|
||||
; RV32I-NEXT: call foo
|
||||
; RV32I-NEXT: .LBB0_6: # %if.end5
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: .LBB0_7: # %if.end5
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a0, %lo(j)(s4)
|
||||
; RV32I-NEXT: beqz a0, .LBB0_8
|
||||
; RV32I-NEXT: # %bb.7: # %if.then7
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: beqz a0, .LBB0_9
|
||||
; RV32I-NEXT: # %bb.8: # %if.then7
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a4, %lo(g)(s7)
|
||||
; RV32I-NEXT: lw a3, %lo(f)(s8)
|
||||
; RV32I-NEXT: lw a2, %lo(e)(s1)
|
||||
@ -90,12 +98,12 @@ define i32 @test() nounwind {
|
||||
; RV32I-NEXT: lw a0, %lo(c)(s10)
|
||||
; RV32I-NEXT: addi a5, zero, 32
|
||||
; RV32I-NEXT: call foo
|
||||
; RV32I-NEXT: .LBB0_8: # %if.end9
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: .LBB0_9: # %if.end9
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a0, %lo(i)(s6)
|
||||
; RV32I-NEXT: beqz a0, .LBB0_10
|
||||
; RV32I-NEXT: # %bb.9: # %if.then11
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: beqz a0, .LBB0_2
|
||||
; RV32I-NEXT: # %bb.10: # %if.then11
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1
|
||||
; RV32I-NEXT: lw a4, %lo(h)(s5)
|
||||
; RV32I-NEXT: lw a3, %lo(g)(s7)
|
||||
; RV32I-NEXT: lw a2, %lo(f)(s8)
|
||||
@ -103,12 +111,7 @@ define i32 @test() nounwind {
|
||||
; RV32I-NEXT: lw a0, %lo(d)(s0)
|
||||
; RV32I-NEXT: addi a5, zero, 32
|
||||
; RV32I-NEXT: call foo
|
||||
; RV32I-NEXT: .LBB0_10: # %for.inc
|
||||
; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; RV32I-NEXT: lw a0, %lo(a)(s9)
|
||||
; RV32I-NEXT: addi a0, a0, -1
|
||||
; RV32I-NEXT: sw a0, %lo(a)(s9)
|
||||
; RV32I-NEXT: bnez a0, .LBB0_2
|
||||
; RV32I-NEXT: j .LBB0_2
|
||||
; RV32I-NEXT: .LBB0_11: # %for.end
|
||||
; RV32I-NEXT: addi a0, zero, 1
|
||||
; RV32I-NEXT: lw s11, 12(sp)
|
||||
|
@ -10,8 +10,9 @@ define i32 @C(i32 %x, i32* nocapture %y) #0 {
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: movs r2, #0
|
||||
; CHECK-NEXT: ldr r3, .LCPI0_0
|
||||
; CHECK-NEXT: b .LBB0_4
|
||||
; CHECK-NEXT: .LBB0_1:
|
||||
; CHECK-NEXT: cmp r2, #128
|
||||
; CHECK-NEXT: beq .LBB0_5
|
||||
; CHECK-NEXT: movs r4, #0
|
||||
; CHECK-NEXT: str r4, [r3, #8]
|
||||
; CHECK-NEXT: lsls r4, r2, #2
|
||||
@ -20,16 +21,15 @@ define i32 @C(i32 %x, i32* nocapture %y) #0 {
|
||||
; CHECK-NEXT: movs r5, #1
|
||||
; CHECK-NEXT: str r5, [r3, #12]
|
||||
; CHECK-NEXT: isb sy
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: .LBB0_3:
|
||||
; CHECK-NEXT: ldr r5, [r3, #12]
|
||||
; CHECK-NEXT: cmp r5, #0
|
||||
; CHECK-NEXT: bne .LBB0_2
|
||||
; CHECK-NEXT: bne .LBB0_3
|
||||
; CHECK-NEXT: ldr r5, [r3, #4]
|
||||
; CHECK-NEXT: str r5, [r1, r4]
|
||||
; CHECK-NEXT: adds r2, r2, #1
|
||||
; CHECK-NEXT: .LBB0_4:
|
||||
; CHECK-NEXT: cmp r2, #128
|
||||
; CHECK-NEXT: bne .LBB0_1
|
||||
; CHECK-NEXT: b .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_5:
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
; CHECK-NEXT: .p2align 2
|
||||
|
@ -6,27 +6,31 @@ define hidden void @test() {
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: movs r0, #1
|
||||
; CHECK-NEXT: lsls r1, r0, #2
|
||||
; CHECK-NEXT: .LBB0_1: @ %switch
|
||||
; CHECK-NEXT: b .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_1: @ %bb2
|
||||
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: bne .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_2: @ %switch
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adr r2, .LJTI0_0
|
||||
; CHECK-NEXT: ldr r2, [r2, r1]
|
||||
; CHECK-NEXT: mov pc, r2
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: .LJTI0_0:
|
||||
; CHECK-NEXT: .long .LBB0_5+1
|
||||
; CHECK-NEXT: .long .LBB0_6+1
|
||||
; CHECK-NEXT: .long .LBB0_4+1
|
||||
; CHECK-NEXT: .long .LBB0_6+1
|
||||
; CHECK-NEXT: .long .LBB0_5+1
|
||||
; CHECK-NEXT: .long .LBB0_3+1
|
||||
; CHECK-NEXT: .LBB0_3: @ %bb
|
||||
; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: bne .LBB0_5
|
||||
; CHECK-NEXT: .LBB0_4: @ %bb2
|
||||
; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: .LBB0_4: @ %switch
|
||||
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: b .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_5: @ %bb
|
||||
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: cmp r0, #0
|
||||
; CHECK-NEXT: beq .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_5: @ %dead
|
||||
; CHECK-NEXT: .LBB0_6: @ %dead
|
||||
entry:
|
||||
br label %switch
|
||||
|
||||
|
@ -82,14 +82,14 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
|
||||
; Check that we sink cold loop blocks after the hot loop body.
|
||||
; CHECK-LABEL: test_loop_cold_blocks:
|
||||
; CHECK: %entry
|
||||
; CHECK-NOT: .p2align
|
||||
; CHECK: %unlikely1
|
||||
; CHECK-NOT: .p2align
|
||||
; CHECK: %unlikely2
|
||||
; CHECK: .p2align
|
||||
; CHECK: %body1
|
||||
; CHECK: %body2
|
||||
; CHECK: %body3
|
||||
; CHECK-NOT: .p2align
|
||||
; CHECK: %unlikely1
|
||||
; CHECK-NOT: .p2align
|
||||
; CHECK: %unlikely2
|
||||
; CHECK: %exit
|
||||
|
||||
entry:
|
||||
@ -125,7 +125,7 @@ exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
!0 = !{!"branch_weights", i32 4, i32 64}
|
||||
!0 = !{!"branch_weights", i32 1, i32 64}
|
||||
|
||||
define i32 @test_loop_early_exits(i32 %i, i32* %a) {
|
||||
; Check that we sink early exit blocks out of loop bodies.
|
||||
@ -189,8 +189,8 @@ define i32 @test_loop_rotate(i32 %i, i32* %a) {
|
||||
; loop, eliminating unconditional branches to the top.
|
||||
; CHECK-LABEL: test_loop_rotate:
|
||||
; CHECK: %entry
|
||||
; CHECK: %body1
|
||||
; CHECK: %body0
|
||||
; CHECK: %body1
|
||||
; CHECK: %exit
|
||||
|
||||
entry:
|
||||
@ -957,16 +957,15 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
|
||||
; CHECK: %if.else
|
||||
; CHECK: %if.end10
|
||||
; Second rotated loop top
|
||||
; CHECK: .p2align
|
||||
; CHECK: %if.then24
|
||||
; CHECK: %while.cond.outer
|
||||
; Third rotated loop top
|
||||
; CHECK: .p2align
|
||||
; CHECK: %if.end20
|
||||
; CHECK: %while.cond
|
||||
; CHECK: %while.body
|
||||
; CHECK: %land.lhs.true
|
||||
; CHECK: %if.then19
|
||||
; CHECK: %if.end20
|
||||
; CHECK: %if.then24
|
||||
; CHECK: %if.then8
|
||||
; CHECK: ret
|
||||
|
||||
@ -1546,8 +1545,8 @@ define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) {
|
||||
; CHECK-LABEL: not_rotate_if_extra_branch_regression
|
||||
; CHECK: %.entry
|
||||
; CHECK: %.first_backedge
|
||||
; CHECK: %.slow
|
||||
; CHECK: %.second_header
|
||||
; CHECK: %.slow
|
||||
.entry:
|
||||
%sum.0 = shl nsw i32 %count, 1
|
||||
br label %.first_header
|
||||
|
@ -4,6 +4,11 @@
|
||||
@Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4]
|
||||
@Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2]
|
||||
|
||||
; CHECK: %entry
|
||||
; CHECK: %bb
|
||||
; CHECK: %bb1
|
||||
; CHECK: %bb2
|
||||
|
||||
define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
|
||||
entry:
|
||||
%0 = load i32, i32* %rk, align 4 ; <i32> [#uses=1]
|
||||
@ -12,8 +17,6 @@ entry:
|
||||
%tmp15 = add i32 %r, -1 ; <i32> [#uses=1]
|
||||
%tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2]
|
||||
br label %bb
|
||||
; CHECK: jmp
|
||||
; CHECK-NEXT: align
|
||||
|
||||
bb: ; preds = %bb1, %entry
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3]
|
||||
|
@ -1,13 +1,12 @@
|
||||
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
|
||||
|
||||
define void @foo() {
|
||||
; Test that when determining the edge probability from a node in an inner loop
|
||||
; to a node in an outer loop, the weights on edges in the inner loop should be
|
||||
; ignored if we are building the chain for the outer loop.
|
||||
; After moving the latch to the top of loop, there is no fall through from the
|
||||
; latch to outer loop.
|
||||
;
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: callq c
|
||||
; CHECK: callq b
|
||||
; CHECK: callq c
|
||||
|
||||
entry:
|
||||
%call = call zeroext i1 @a()
|
||||
|
@ -7,9 +7,9 @@ define i32 @bar(i32 %count) {
|
||||
; Later backedge1 and backedge2 is rotated before loop header.
|
||||
; CHECK-LABEL: bar
|
||||
; CHECK: %.entry
|
||||
; CHECK: %.header
|
||||
; CHECK: %.backedge1
|
||||
; CHECK: %.backedge2
|
||||
; CHECK: %.header
|
||||
; CHECK: %.exit
|
||||
.entry:
|
||||
%c = shl nsw i32 %count, 2
|
||||
|
@ -258,9 +258,12 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
|
||||
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
|
||||
; CHECK32-NEXT: xorl %edi, %edi # encoding: [0x31,0xff]
|
||||
; CHECK32-NEXT: incl %edi # encoding: [0x47]
|
||||
; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: .LBB3_2: # %for.body
|
||||
; CHECK32-NEXT: .LBB3_1: # %for.cond
|
||||
; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2]
|
||||
; CHECK32-NEXT: je .LBB3_13 # encoding: [0x74,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: # %bb.2: # %for.body
|
||||
; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
|
||||
; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A]
|
||||
@ -314,12 +317,9 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
|
||||
; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK32-NEXT: incl %eax # encoding: [0x40]
|
||||
; CHECK32-NEXT: decl %edx # encoding: [0x4a]
|
||||
; CHECK32-NEXT: .LBB3_1: # %for.cond
|
||||
; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2]
|
||||
; CHECK32-NEXT: jne .LBB3_2 # encoding: [0x75,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: # %bb.13:
|
||||
; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
|
||||
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
|
||||
; CHECK32-NEXT: .LBB3_13:
|
||||
; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
|
||||
; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
|
||||
; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A]
|
||||
@ -369,56 +369,59 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
|
||||
; CHECK64-NEXT: .cfi_adjust_cfa_offset 8
|
||||
; CHECK64-NEXT: popq %r8 # encoding: [0x41,0x58]
|
||||
; CHECK64-NEXT: .cfi_adjust_cfa_offset -8
|
||||
; CHECK64-NEXT: jmp .LBB3_11 # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB3_1: # %for.body
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
|
||||
; CHECK64-NEXT: je .LBB3_9 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB3_1: # %for.cond
|
||||
; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
|
||||
; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.2: # %for.body
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
|
||||
; CHECK64-NEXT: je .LBB3_7 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.3: # %for.body
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
|
||||
; CHECK64-NEXT: jne .LBB3_10 # encoding: [0x75,A]
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
|
||||
; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.4: # %sw.bb
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: # %bb.3: # %for.body
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
|
||||
; CHECK64-NEXT: je .LBB3_8 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.4: # %for.body
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
|
||||
; CHECK64-NEXT: jne .LBB3_11 # encoding: [0x75,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.5: # %sw.bb
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17]
|
||||
; CHECK64-NEXT: cmpl $43, %edx # encoding: [0x83,0xfa,0x2b]
|
||||
; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
|
||||
; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.5: # %sw.bb
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.6: # %sw.bb
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d]
|
||||
; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
|
||||
; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.6: # %if.else
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.7: # %if.else
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: addl $-48, %edx # encoding: [0x83,0xc2,0xd0]
|
||||
; CHECK64-NEXT: cmpl $10, %edx # encoding: [0x83,0xfa,0x0a]
|
||||
; CHECK64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB3_7: # %sw.bb14
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: jmp .LBB3_9 # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB3_8: # %sw.bb14
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
|
||||
; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
|
||||
; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
|
||||
; CHECK64-NEXT: .LBB3_8: # %if.else
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: .LBB3_9: # %if.else
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9]
|
||||
; CHECK64-NEXT: jb .LBB3_10 # encoding: [0x72,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: jb .LBB3_11 # encoding: [0x72,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: jmp .LBB3_13 # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB3_9: # %sw.bb22
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: .LBB3_10: # %sw.bb22
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
|
||||
; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
|
||||
; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
|
||||
@ -426,16 +429,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
|
||||
; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
|
||||
; CHECK64-NEXT: # encoding: [0x73,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: .LBB3_10: # %for.inc
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1
|
||||
; CHECK64-NEXT: .LBB3_11: # %for.inc
|
||||
; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; CHECK64-NEXT: incq %rdi # encoding: [0x48,0xff,0xc7]
|
||||
; CHECK64-NEXT: decq %rax # encoding: [0x48,0xff,0xc8]
|
||||
; CHECK64-NEXT: .LBB3_11: # %for.cond
|
||||
; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
|
||||
; CHECK64-NEXT: jne .LBB3_1 # encoding: [0x75,A]
|
||||
; CHECK64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
|
||||
; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
|
||||
; CHECK64-NEXT: # %bb.12:
|
||||
; CHECK64-NEXT: .LBB3_12:
|
||||
; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
|
||||
; CHECK64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
|
||||
; CHECK64-NEXT: # kill: def $al killed $al killed $eax
|
||||
@ -451,51 +451,54 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
|
||||
; WIN64-NEXT: movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8]
|
||||
; WIN64-NEXT: leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01]
|
||||
; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
|
||||
; WIN64-NEXT: jmp .LBB3_10 # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB3_1: # %for.body
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
|
||||
; WIN64-NEXT: je .LBB3_8 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB3_1: # %for.cond
|
||||
; WIN64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
|
||||
; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.2: # %for.body
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
|
||||
; WIN64-NEXT: je .LBB3_6 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_6-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.3: # %for.body
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
|
||||
; WIN64-NEXT: jne .LBB3_9 # encoding: [0x75,A]
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
|
||||
; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.4: # %sw.bb
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: # %bb.3: # %for.body
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
|
||||
; WIN64-NEXT: je .LBB3_7 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.4: # %for.body
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
|
||||
; WIN64-NEXT: jne .LBB3_10 # encoding: [0x75,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.5: # %sw.bb
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
|
||||
; WIN64-NEXT: cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b]
|
||||
; WIN64-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
|
||||
; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.5: # %sw.bb
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.6: # %sw.bb
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d]
|
||||
; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: jmp .LBB3_7 # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB3_6: # %sw.bb14
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB3_7: # %sw.bb14
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
|
||||
; WIN64-NEXT: .LBB3_7: # %if.else
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: .LBB3_8: # %if.else
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
|
||||
; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
|
||||
; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a]
|
||||
; WIN64-NEXT: jb .LBB3_9 # encoding: [0x72,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: jb .LBB3_10 # encoding: [0x72,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: jmp .LBB3_12 # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB3_8: # %sw.bb22
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: .LBB3_9: # %sw.bb22
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
|
||||
; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
|
||||
; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
|
||||
@ -503,16 +506,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
|
||||
; WIN64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
|
||||
; WIN64-NEXT: # encoding: [0x73,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: .LBB3_9: # %for.inc
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1
|
||||
; WIN64-NEXT: .LBB3_10: # %for.inc
|
||||
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
||||
; WIN64-NEXT: incq %rcx # encoding: [0x48,0xff,0xc1]
|
||||
; WIN64-NEXT: decq %r8 # encoding: [0x49,0xff,0xc8]
|
||||
; WIN64-NEXT: .LBB3_10: # %for.cond
|
||||
; WIN64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
|
||||
; WIN64-NEXT: jne .LBB3_1 # encoding: [0x75,A]
|
||||
; WIN64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
|
||||
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
|
||||
; WIN64-NEXT: # %bb.11:
|
||||
; WIN64-NEXT: .LBB3_11:
|
||||
; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
|
||||
; WIN64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
|
||||
; WIN64-NEXT: # kill: def $al killed $al killed $eax
|
||||
|
@ -7,12 +7,14 @@
|
||||
; order to avoid a branch within the loop.
|
||||
|
||||
; CHECK-LABEL: simple:
|
||||
; CHECK: jmp .LBB0_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: callq loop_latch
|
||||
; CHECK: align
|
||||
; CHECK-NEXT: .LBB0_1:
|
||||
; CHECK-NEXT: callq loop_header
|
||||
; CHECK: js .LBB0_3
|
||||
; CHECK-NEXT: callq loop_latch
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3:
|
||||
; CHECK-NEXT: callq exit
|
||||
|
||||
define void @simple() nounwind {
|
||||
entry:
|
||||
@ -75,17 +77,21 @@ exit:
|
||||
; CHECK-LABEL: yet_more_involved:
|
||||
; CHECK: jmp .LBB2_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB2_5:
|
||||
; CHECK-NEXT: callq block_a_true_func
|
||||
; CHECK-NEXT: callq block_a_merge_func
|
||||
; CHECK-NEXT: .LBB2_1:
|
||||
|
||||
; CHECK: .LBB2_1:
|
||||
; CHECK-NEXT: callq body
|
||||
;
|
||||
; LBB2_4
|
||||
; CHECK: callq bar99
|
||||
; CHECK-NEXT: callq get
|
||||
; CHECK-NEXT: cmpl $2, %eax
|
||||
; CHECK-NEXT: jge .LBB2_2
|
||||
; CHECK-NEXT: callq bar99
|
||||
; CHECK-NEXT: callq get
|
||||
; CHECK-NEXT: cmpl $2999, %eax
|
||||
; CHECK-NEXT: jle .LBB2_5
|
||||
; CHECK-NEXT: jg .LBB2_6
|
||||
; CHECK-NEXT: callq block_a_true_func
|
||||
; CHECK-NEXT: callq block_a_merge_func
|
||||
; CHECK-NEXT: jmp .LBB2_1
|
||||
; CHECK-NEXT: align
|
||||
; CHECK-NEXT: .LBB2_6:
|
||||
; CHECK-NEXT: callq block_a_false_func
|
||||
; CHECK-NEXT: callq block_a_merge_func
|
||||
; CHECK-NEXT: jmp .LBB2_1
|
||||
@ -201,12 +207,12 @@ block102:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: check_minsize:
|
||||
; CHECK: jmp .LBB4_1
|
||||
; CHECK-NOT: align
|
||||
; CHECK-NEXT: .LBB4_2:
|
||||
; CHECK-NEXT: callq loop_latch
|
||||
; CHECK-NEXT: .LBB4_1:
|
||||
; CHECK: .LBB4_1:
|
||||
; CHECK-NEXT: callq loop_header
|
||||
; CHECK: callq loop_latch
|
||||
; CHECK: .LBB4_3:
|
||||
; CHECK: callq exit
|
||||
|
||||
|
||||
define void @check_minsize() minsize nounwind {
|
||||
|
120
test/CodeGen/X86/loop-rotate.ll
Normal file
120
test/CodeGen/X86/loop-rotate.ll
Normal file
@ -0,0 +1,120 @@
|
||||
; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
|
||||
|
||||
; Don't rotate the loop if the number of fall through to exit is not larger
|
||||
; than the number of fall through to header.
|
||||
define void @no_rotate() {
|
||||
; CHECK-LABEL: no_rotate
|
||||
; CHECK: %entry
|
||||
; CHECK: %header
|
||||
; CHECK: %middle
|
||||
; CHECK: %latch1
|
||||
; CHECK: %latch2
|
||||
; CHECK: %end
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
header:
|
||||
%val1 = call i1 @foo()
|
||||
br i1 %val1, label %middle, label %end
|
||||
|
||||
middle:
|
||||
%val2 = call i1 @foo()
|
||||
br i1 %val2, label %latch1, label %end
|
||||
|
||||
latch1:
|
||||
%val3 = call i1 @foo()
|
||||
br i1 %val3, label %latch2, label %header
|
||||
|
||||
latch2:
|
||||
%val4 = call i1 @foo()
|
||||
br label %header
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @do_rotate() {
|
||||
; CHECK-LABEL: do_rotate
|
||||
; CHECK: %entry
|
||||
; CHECK: %then
|
||||
; CHECK: %else
|
||||
; CHECK: %latch1
|
||||
; CHECK: %latch2
|
||||
; CHECK: %header
|
||||
; CHECK: %end
|
||||
entry:
|
||||
%val0 = call i1 @foo()
|
||||
br i1 %val0, label %then, label %else
|
||||
|
||||
then:
|
||||
call void @a()
|
||||
br label %header
|
||||
|
||||
else:
|
||||
call void @b()
|
||||
br label %header
|
||||
|
||||
header:
|
||||
%val1 = call i1 @foo()
|
||||
br i1 %val1, label %latch1, label %end
|
||||
|
||||
latch1:
|
||||
%val3 = call i1 @foo()
|
||||
br i1 %val3, label %latch2, label %header
|
||||
|
||||
latch2:
|
||||
%val4 = call i1 @foo()
|
||||
br label %header
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The loop structure is same as in @no_rotate, but the loop header's predecessor
|
||||
; doesn't fall through to it, so it should be rotated to get exit fall through.
|
||||
define void @do_rotate2() {
|
||||
; CHECK-LABEL: do_rotate2
|
||||
; CHECK: %entry
|
||||
; CHECK: %then
|
||||
; CHECK: %middle
|
||||
; CHECK: %latch1
|
||||
; CHECK: %latch2
|
||||
; CHECK: %header
|
||||
; CHECK: %exit
|
||||
entry:
|
||||
%val0 = call i1 @foo()
|
||||
br i1 %val0, label %then, label %header, !prof !1
|
||||
|
||||
then:
|
||||
call void @a()
|
||||
br label %end
|
||||
|
||||
header:
|
||||
%val1 = call i1 @foo()
|
||||
br i1 %val1, label %middle, label %exit
|
||||
|
||||
middle:
|
||||
%val2 = call i1 @foo()
|
||||
br i1 %val2, label %latch1, label %exit
|
||||
|
||||
latch1:
|
||||
%val3 = call i1 @foo()
|
||||
br i1 %val3, label %latch2, label %header
|
||||
|
||||
latch2:
|
||||
%val4 = call i1 @foo()
|
||||
br label %header
|
||||
|
||||
exit:
|
||||
call void @b()
|
||||
br label %end
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i1 @foo()
|
||||
declare void @a()
|
||||
declare void @b()
|
||||
|
||||
!1 = !{!"branch_weights", i32 10, i32 1}
|
@ -21,22 +21,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
|
||||
; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8
|
||||
; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10
|
||||
; GENERIC-NEXT: movq %rcx, %r11
|
||||
; GENERIC-NEXT: jmp LBB0_1
|
||||
; GENERIC-NEXT: .p2align 4, 0x90
|
||||
; GENERIC-NEXT: LBB0_2: ## %bb1
|
||||
; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; GENERIC-NEXT: movl %edi, %ebx
|
||||
; GENERIC-NEXT: shrl $16, %ebx
|
||||
; GENERIC-NEXT: movzbl %bl, %ebx
|
||||
; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax
|
||||
; GENERIC-NEXT: xorl -4(%r14), %eax
|
||||
; GENERIC-NEXT: shrl $24, %edi
|
||||
; GENERIC-NEXT: movzbl %bpl, %ebx
|
||||
; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx
|
||||
; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx
|
||||
; GENERIC-NEXT: xorl (%r14), %ebx
|
||||
; GENERIC-NEXT: decq %r11
|
||||
; GENERIC-NEXT: addq $16, %r14
|
||||
; GENERIC-NEXT: LBB0_1: ## %bb
|
||||
; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; GENERIC-NEXT: movzbl %al, %edi
|
||||
@ -56,8 +41,23 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
|
||||
; GENERIC-NEXT: shrl $24, %eax
|
||||
; GENERIC-NEXT: movl (%r9,%rax,4), %eax
|
||||
; GENERIC-NEXT: testq %r11, %r11
|
||||
; GENERIC-NEXT: jne LBB0_2
|
||||
; GENERIC-NEXT: ## %bb.3: ## %bb2
|
||||
; GENERIC-NEXT: je LBB0_3
|
||||
; GENERIC-NEXT: ## %bb.2: ## %bb1
|
||||
; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; GENERIC-NEXT: movl %edi, %ebx
|
||||
; GENERIC-NEXT: shrl $16, %ebx
|
||||
; GENERIC-NEXT: movzbl %bl, %ebx
|
||||
; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax
|
||||
; GENERIC-NEXT: xorl -4(%r14), %eax
|
||||
; GENERIC-NEXT: shrl $24, %edi
|
||||
; GENERIC-NEXT: movzbl %bpl, %ebx
|
||||
; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx
|
||||
; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx
|
||||
; GENERIC-NEXT: xorl (%r14), %ebx
|
||||
; GENERIC-NEXT: decq %r11
|
||||
; GENERIC-NEXT: addq $16, %r14
|
||||
; GENERIC-NEXT: jmp LBB0_1
|
||||
; GENERIC-NEXT: LBB0_3: ## %bb2
|
||||
; GENERIC-NEXT: shlq $4, %rcx
|
||||
; GENERIC-NEXT: andl $-16777216, %eax ## imm = 0xFF000000
|
||||
; GENERIC-NEXT: movl %edi, %ebx
|
||||
@ -105,21 +105,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
|
||||
; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10
|
||||
; ATOM-NEXT: decl %ecx
|
||||
; ATOM-NEXT: movq %rcx, %r11
|
||||
; ATOM-NEXT: jmp LBB0_1
|
||||
; ATOM-NEXT: .p2align 4, 0x90
|
||||
; ATOM-NEXT: LBB0_2: ## %bb1
|
||||
; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; ATOM-NEXT: shrl $16, %eax
|
||||
; ATOM-NEXT: shrl $24, %edi
|
||||
; ATOM-NEXT: decq %r11
|
||||
; ATOM-NEXT: movzbl %al, %ebp
|
||||
; ATOM-NEXT: movzbl %bl, %eax
|
||||
; ATOM-NEXT: movl (%r10,%rax,4), %eax
|
||||
; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
|
||||
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
|
||||
; ATOM-NEXT: xorl -4(%r14), %r15d
|
||||
; ATOM-NEXT: xorl (%r14), %eax
|
||||
; ATOM-NEXT: addq $16, %r14
|
||||
; ATOM-NEXT: LBB0_1: ## %bb
|
||||
; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; ATOM-NEXT: movl %eax, %edi
|
||||
@ -140,8 +126,22 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
|
||||
; ATOM-NEXT: movl (%r9,%rax,4), %r15d
|
||||
; ATOM-NEXT: testq %r11, %r11
|
||||
; ATOM-NEXT: movl %edi, %eax
|
||||
; ATOM-NEXT: jne LBB0_2
|
||||
; ATOM-NEXT: ## %bb.3: ## %bb2
|
||||
; ATOM-NEXT: je LBB0_3
|
||||
; ATOM-NEXT: ## %bb.2: ## %bb1
|
||||
; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; ATOM-NEXT: shrl $16, %eax
|
||||
; ATOM-NEXT: shrl $24, %edi
|
||||
; ATOM-NEXT: decq %r11
|
||||
; ATOM-NEXT: movzbl %al, %ebp
|
||||
; ATOM-NEXT: movzbl %bl, %eax
|
||||
; ATOM-NEXT: movl (%r10,%rax,4), %eax
|
||||
; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
|
||||
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
|
||||
; ATOM-NEXT: xorl -4(%r14), %r15d
|
||||
; ATOM-NEXT: xorl (%r14), %eax
|
||||
; ATOM-NEXT: addq $16, %r14
|
||||
; ATOM-NEXT: jmp LBB0_1
|
||||
; ATOM-NEXT: LBB0_3: ## %bb2
|
||||
; ATOM-NEXT: shrl $16, %eax
|
||||
; ATOM-NEXT: shrl $8, %edi
|
||||
; ATOM-NEXT: movzbl %bl, %ebp
|
||||
|
@ -1,11 +1,11 @@
|
||||
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux --force-precise-rotation-cost < %s | FileCheck %s
|
||||
; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
|
||||
|
||||
; The block latch should be moved before header.
|
||||
;CHECK-LABEL: test1:
|
||||
;CHECK: %latch
|
||||
;CHECK: %header
|
||||
;CHECK: %false
|
||||
define i32 @test1(i32* %p) !prof !0 {
|
||||
define i32 @test1(i32* %p) {
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
@ -39,7 +39,7 @@ exit:
|
||||
;CHECK: %latch
|
||||
;CHECK: %header
|
||||
;CHECK: %false
|
||||
define i32 @test2(i32* %p) !prof !0 {
|
||||
define i32 @test2(i32* %p) {
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
@ -107,7 +107,7 @@ exit:
|
||||
;CHECK: %latch
|
||||
;CHECK: %header
|
||||
;CHECK: %false
|
||||
define i32 @test3(i32* %p) !prof !0 {
|
||||
define i32 @test3(i32* %p) {
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
@ -173,9 +173,9 @@ exit:
|
||||
;CHECK: %header
|
||||
;CHECK: %true
|
||||
;CHECK: %latch
|
||||
;CHECK: %exit
|
||||
;CHECK: %false
|
||||
define i32 @test4(i32 %t, i32* %p) !prof !0 {
|
||||
;CHECK: %exit
|
||||
define i32 @test4(i32 %t, i32* %p) {
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
@ -207,7 +207,6 @@ exit:
|
||||
ret i32 %count4
|
||||
}
|
||||
|
||||
!0 = !{!"function_entry_count", i32 1000}
|
||||
!1 = !{!"branch_weights", i32 100, i32 1}
|
||||
!2 = !{!"branch_weights", i32 16, i32 16}
|
||||
!3 = !{!"branch_weights", i32 51, i32 49}
|
||||
@ -217,7 +216,7 @@ exit:
|
||||
;CHECK: %entry
|
||||
;CHECK: %header
|
||||
;CHECK: %latch
|
||||
define void @test5(i32* %p) !prof !0 {
|
||||
define void @test5(i32* %p) {
|
||||
entry:
|
||||
br label %header
|
||||
|
||||
@ -237,3 +236,4 @@ latch:
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -5,9 +5,13 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %body
|
||||
; CHECK-NEXT: .LBB0_1: # %loop
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %r9
|
||||
; CHECK-NEXT: cmpq %rcx, %r9
|
||||
; CHECK-NEXT: je .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %body
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl $1, (%rdx,%r9,4)
|
||||
; CHECK-NEXT: movzbl (%rdi,%r9,4), %r8d
|
||||
@ -17,12 +21,8 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) {
|
||||
; CHECK-NEXT: movl %eax, (%rdi,%r9,4)
|
||||
; CHECK-NEXT: incq %r9
|
||||
; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: .LBB0_1: # %loop
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %r9
|
||||
; CHECK-NEXT: cmpq %rcx, %r9
|
||||
; CHECK-NEXT: jne .LBB0_2
|
||||
; CHECK-NEXT: # %bb.3: # %endloop
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %endloop
|
||||
; CHECK-NEXT: retq
|
||||
%i = alloca i64
|
||||
store i64 0, i64* %i
|
||||
|
@ -103,6 +103,34 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: xorl %r13d, %r13d
|
||||
; CHECK-NEXT: jmp LBB0_13
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_20: ## %sw.bb256
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: decl %r15d
|
||||
; CHECK-NEXT: testl %r15d, %r15d
|
||||
; CHECK-NEXT: movl %r13d, %r14d
|
||||
; CHECK-NEXT: jle LBB0_22
|
||||
; CHECK-NEXT: LBB0_13: ## %while.body200
|
||||
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: ## Child Loop BB0_30 Depth 2
|
||||
; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
|
||||
; CHECK-NEXT: leal -268(%r14), %eax
|
||||
; CHECK-NEXT: cmpl $105, %eax
|
||||
; CHECK-NEXT: ja LBB0_14
|
||||
; CHECK-NEXT: ## %bb.56: ## %while.body200
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movslq (%rdi,%rax,4), %rax
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: jmpq *%rax
|
||||
; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: jne LBB0_21
|
||||
; CHECK-NEXT: jmp LBB0_55
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_14: ## %while.body200
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: leal 1(%r14), %eax
|
||||
@ -118,12 +146,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $1, %r13d
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: jne LBB0_21
|
||||
; CHECK-NEXT: jmp LBB0_55
|
||||
; CHECK-NEXT: LBB0_26: ## %sw.bb474
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
@ -137,30 +159,52 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: ## implicit-def: $rax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jns LBB0_30
|
||||
; CHECK-NEXT: jmp LBB0_55
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_29: ## %land.rhs485
|
||||
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
|
||||
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
|
||||
; CHECK-NEXT: leaq 1(%r12), %rax
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: je LBB0_33
|
||||
; CHECK-NEXT: ## %bb.29: ## %land.rhs485
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: js LBB0_55
|
||||
; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
|
||||
; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780
|
||||
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
|
||||
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: movq %rax, %r12
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: jne LBB0_32
|
||||
; CHECK-NEXT: ## %bb.31: ## %lor.rhs500
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
|
||||
; CHECK-NEXT: movl $256, %esi ## imm = 0x100
|
||||
; CHECK-NEXT: callq ___maskrune
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: je LBB0_34
|
||||
; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
|
||||
; CHECK-NEXT: leaq 1(%r12), %rax
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: jne LBB0_29
|
||||
; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit
|
||||
; CHECK-NEXT: jne LBB0_32
|
||||
; CHECK-NEXT: jmp LBB0_34
|
||||
; CHECK-NEXT: LBB0_45: ## %sw.bb1134
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: cmpq %rax, %rcx
|
||||
; CHECK-NEXT: jb LBB0_55
|
||||
; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_19: ## %sw.bb243
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $2, %r13d
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_40: ## %sw.bb566
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $20, %r13d
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: incq %r12
|
||||
; CHECK-NEXT: LBB0_34: ## %if.end517
|
||||
@ -199,47 +243,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_45: ## %sw.bb1134
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: cmpq %rax, %rcx
|
||||
; CHECK-NEXT: jb LBB0_55
|
||||
; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_19: ## %sw.bb243
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $2, %r13d
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: LBB0_40: ## %sw.bb566
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl $20, %r13d
|
||||
; CHECK-NEXT: jmp LBB0_21
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_13: ## %while.body200
|
||||
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: ## Child Loop BB0_29 Depth 2
|
||||
; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
|
||||
; CHECK-NEXT: leal -268(%r14), %eax
|
||||
; CHECK-NEXT: cmpl $105, %eax
|
||||
; CHECK-NEXT: ja LBB0_14
|
||||
; CHECK-NEXT: ## %bb.56: ## %while.body200
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movslq (%rdi,%rax,4), %rax
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: jmpq *%rax
|
||||
; CHECK-NEXT: LBB0_20: ## %sw.bb256
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: movl %r14d, %r13d
|
||||
; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
|
||||
; CHECK-NEXT: decl %r15d
|
||||
; CHECK-NEXT: testl %r15d, %r15d
|
||||
; CHECK-NEXT: movl %r13d, %r14d
|
||||
; CHECK-NEXT: jg LBB0_13
|
||||
; CHECK-NEXT: jmp LBB0_22
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_42: ## %while.cond864
|
||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
|
@ -85,25 +85,36 @@ define i32 @test_branches_order() uwtable ssp {
|
||||
; CHECK-NEXT: jg LBB0_16
|
||||
; CHECK-NEXT: LBB0_9: ## %for.cond18.preheader
|
||||
; CHECK-NEXT: ## =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: ## Child Loop BB0_10 Depth 2
|
||||
; CHECK-NEXT: ## Child Loop BB0_11 Depth 2
|
||||
; CHECK-NEXT: ## Child Loop BB0_12 Depth 3
|
||||
; CHECK-NEXT: movq %rcx, %rdx
|
||||
; CHECK-NEXT: xorl %esi, %esi
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7
|
||||
; CHECK-NEXT: jle LBB0_11
|
||||
; CHECK-NEXT: jmp LBB0_15
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_10: ## %for.cond18
|
||||
; CHECK-NEXT: LBB0_14: ## %exit
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2
|
||||
; CHECK-NEXT: addq %rsi, %rbp
|
||||
; CHECK-NEXT: incq %rdi
|
||||
; CHECK-NEXT: decq %rsi
|
||||
; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9
|
||||
; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18
|
||||
; CHECK-NEXT: jne LBB0_5
|
||||
; CHECK-NEXT: ## %bb.10: ## %for.cond18
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2
|
||||
; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7
|
||||
; CHECK-NEXT: jg LBB0_15
|
||||
; CHECK-NEXT: LBB0_11: ## %for.body20
|
||||
; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1
|
||||
; CHECK-NEXT: ## => This Loop Header: Depth=2
|
||||
; CHECK-NEXT: ## Child Loop BB0_12 Depth 3
|
||||
; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7
|
||||
; CHECK-NEXT: jg LBB0_15
|
||||
; CHECK-NEXT: ## %bb.11: ## %for.body20
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2
|
||||
; CHECK-NEXT: movq $-1000, %rbp ## imm = 0xFC18
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_12: ## %do.body.i
|
||||
; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1
|
||||
; CHECK-NEXT: ## Parent Loop BB0_10 Depth=2
|
||||
; CHECK-NEXT: ## Parent Loop BB0_11 Depth=2
|
||||
; CHECK-NEXT: ## => This Inner Loop Header: Depth=3
|
||||
; CHECK-NEXT: cmpb $120, 1000(%rdx,%rbp)
|
||||
; CHECK-NEXT: je LBB0_14
|
||||
@ -111,16 +122,6 @@ define i32 @test_branches_order() uwtable ssp {
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_12 Depth=3
|
||||
; CHECK-NEXT: incq %rbp
|
||||
; CHECK-NEXT: jne LBB0_12
|
||||
; CHECK-NEXT: jmp LBB0_5
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_14: ## %exit
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2
|
||||
; CHECK-NEXT: addq %rsi, %rbp
|
||||
; CHECK-NEXT: incq %rdi
|
||||
; CHECK-NEXT: decq %rsi
|
||||
; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9
|
||||
; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18
|
||||
; CHECK-NEXT: je LBB0_10
|
||||
; CHECK-NEXT: LBB0_5: ## %if.then
|
||||
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
|
||||
; CHECK-NEXT: callq _puts
|
||||
|
@ -215,10 +215,7 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp
|
||||
; X64-NEXT: movl %esi, %ebp
|
||||
; X64-NEXT: cmovneq %r15, %rax
|
||||
; X64-NEXT: xorl %ebx, %ebx
|
||||
; X64-NEXT: jmp .LBB2_3
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB2_6: # in Loop: Header=BB2_3 Depth=1
|
||||
; X64-NEXT: cmovgeq %r15, %rax
|
||||
; X64-NEXT: .LBB2_3: # %l.header
|
||||
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X64-NEXT: movslq (%r12), %rcx
|
||||
@ -237,8 +234,11 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp
|
||||
; X64-NEXT: cmovneq %r15, %rax
|
||||
; X64-NEXT: incl %ebx
|
||||
; X64-NEXT: cmpl %ebp, %ebx
|
||||
; X64-NEXT: jl .LBB2_6
|
||||
; X64-NEXT: # %bb.4:
|
||||
; X64-NEXT: jge .LBB2_4
|
||||
; X64-NEXT: # %bb.6: # in Loop: Header=BB2_3 Depth=1
|
||||
; X64-NEXT: cmovgeq %r15, %rax
|
||||
; X64-NEXT: jmp .LBB2_3
|
||||
; X64-NEXT: .LBB2_4:
|
||||
; X64-NEXT: cmovlq %r15, %rax
|
||||
; X64-NEXT: .LBB2_5: # %exit
|
||||
; X64-NEXT: shlq $47, %rax
|
||||
@ -328,20 +328,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
|
||||
; X64-NEXT: xorl %r13d, %r13d
|
||||
; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: testl %r15d, %r15d
|
||||
; X64-NEXT: jg .LBB3_5
|
||||
; X64-NEXT: jmp .LBB3_4
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB3_12:
|
||||
; X64-NEXT: cmovgeq %rbp, %rax
|
||||
; X64-NEXT: testl %r15d, %r15d
|
||||
; X64-NEXT: jle .LBB3_4
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB3_5: # %l2.header.preheader
|
||||
; X64-NEXT: cmovleq %rbp, %rax
|
||||
; X64-NEXT: xorl %r15d, %r15d
|
||||
; X64-NEXT: jmp .LBB3_6
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB3_11: # in Loop: Header=BB3_6 Depth=1
|
||||
; X64-NEXT: cmovgeq %rbp, %rax
|
||||
; X64-NEXT: .LBB3_6: # %l2.header
|
||||
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X64-NEXT: movslq (%rbx), %rcx
|
||||
@ -360,8 +352,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
|
||||
; X64-NEXT: cmovneq %rbp, %rax
|
||||
; X64-NEXT: incl %r15d
|
||||
; X64-NEXT: cmpl %r12d, %r15d
|
||||
; X64-NEXT: jl .LBB3_11
|
||||
; X64-NEXT: # %bb.7:
|
||||
; X64-NEXT: jge .LBB3_7
|
||||
; X64-NEXT: # %bb.11: # in Loop: Header=BB3_6 Depth=1
|
||||
; X64-NEXT: cmovgeq %rbp, %rax
|
||||
; X64-NEXT: jmp .LBB3_6
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB3_7:
|
||||
; X64-NEXT: cmovlq %rbp, %rax
|
||||
; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
|
||||
; X64-NEXT: jmp .LBB3_8
|
||||
@ -385,8 +381,13 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
|
||||
; X64-NEXT: cmovneq %rbp, %rax
|
||||
; X64-NEXT: incl %r13d
|
||||
; X64-NEXT: cmpl %r15d, %r13d
|
||||
; X64-NEXT: jl .LBB3_12
|
||||
; X64-NEXT: # %bb.9:
|
||||
; X64-NEXT: jge .LBB3_9
|
||||
; X64-NEXT: # %bb.12:
|
||||
; X64-NEXT: cmovgeq %rbp, %rax
|
||||
; X64-NEXT: testl %r15d, %r15d
|
||||
; X64-NEXT: jg .LBB3_5
|
||||
; X64-NEXT: jmp .LBB3_4
|
||||
; X64-NEXT: .LBB3_9:
|
||||
; X64-NEXT: cmovlq %rbp, %rax
|
||||
; X64-NEXT: .LBB3_10: # %exit
|
||||
; X64-NEXT: shlq $47, %rax
|
||||
@ -418,7 +419,17 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
|
||||
; X64-LFENCE-NEXT: movl %esi, %r15d
|
||||
; X64-LFENCE-NEXT: lfence
|
||||
; X64-LFENCE-NEXT: xorl %r12d, %r12d
|
||||
; X64-LFENCE-NEXT: jmp .LBB3_2
|
||||
; X64-LFENCE-NEXT: .p2align 4, 0x90
|
||||
; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch
|
||||
; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1
|
||||
; X64-LFENCE-NEXT: lfence
|
||||
; X64-LFENCE-NEXT: movslq (%rbx), %rax
|
||||
; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi
|
||||
; X64-LFENCE-NEXT: callq sink
|
||||
; X64-LFENCE-NEXT: incl %r12d
|
||||
; X64-LFENCE-NEXT: cmpl %r15d, %r12d
|
||||
; X64-LFENCE-NEXT: jge .LBB3_6
|
||||
; X64-LFENCE-NEXT: .LBB3_2: # %l1.header
|
||||
; X64-LFENCE-NEXT: # =>This Loop Header: Depth=1
|
||||
; X64-LFENCE-NEXT: # Child Loop BB3_4 Depth 2
|
||||
@ -440,15 +451,7 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt
|
||||
; X64-LFENCE-NEXT: incl %ebp
|
||||
; X64-LFENCE-NEXT: cmpl %r13d, %ebp
|
||||
; X64-LFENCE-NEXT: jl .LBB3_4
|
||||
; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch
|
||||
; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1
|
||||
; X64-LFENCE-NEXT: lfence
|
||||
; X64-LFENCE-NEXT: movslq (%rbx), %rax
|
||||
; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi
|
||||
; X64-LFENCE-NEXT: callq sink
|
||||
; X64-LFENCE-NEXT: incl %r12d
|
||||
; X64-LFENCE-NEXT: cmpl %r15d, %r12d
|
||||
; X64-LFENCE-NEXT: jl .LBB3_2
|
||||
; X64-LFENCE-NEXT: jmp .LBB3_5
|
||||
; X64-LFENCE-NEXT: .LBB3_6: # %exit
|
||||
; X64-LFENCE-NEXT: lfence
|
||||
; X64-LFENCE-NEXT: addq $8, %rsp
|
||||
|
@ -12,14 +12,17 @@ define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: incq %rsi
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: jne .LBB0_2
|
||||
; CHECK-NEXT: jmp .LBB0_5
|
||||
; CHECK-NEXT: je .LBB0_5
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_4: # %inner_loop_latch
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: addq $2, %rsi
|
||||
; CHECK-NEXT: .LBB0_2: # %inner_loop_top
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: # =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: # Child Loop BB0_4 Depth 2
|
||||
; CHECK-NEXT: cmpb $0, (%rsi)
|
||||
; CHECK-NEXT: js .LBB0_3
|
||||
; CHECK-NEXT: .LBB0_4: # %inner_loop_latch
|
||||
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
|
||||
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: addq $2, %rsi
|
||||
; CHECK-NEXT: cmpb $0, (%rsi)
|
||||
; CHECK-NEXT: jns .LBB0_4
|
||||
; CHECK-NEXT: jmp .LBB0_3
|
||||
@ -130,58 +133,58 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
|
||||
; CHECK-NEXT: testl %ebp, %ebp
|
||||
; CHECK-NEXT: je .LBB1_18
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB1_8: # %shared_loop_header
|
||||
; CHECK-NEXT: .LBB1_9: # %shared_loop_header
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testq %rbx, %rbx
|
||||
; CHECK-NEXT: jne .LBB1_27
|
||||
; CHECK-NEXT: # %bb.9: # %inner_loop_body
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: # %bb.10: # %inner_loop_body
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: jns .LBB1_8
|
||||
; CHECK-NEXT: # %bb.10: # %if.end96.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: jns .LBB1_9
|
||||
; CHECK-NEXT: # %bb.11: # %if.end96.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: cmpl $3, %ebp
|
||||
; CHECK-NEXT: jae .LBB1_22
|
||||
; CHECK-NEXT: # %bb.11: # %if.end287.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: # %bb.12: # %if.end287.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: xorl %esi, %esi
|
||||
; CHECK-NEXT: cmpl $1, %ebp
|
||||
; CHECK-NEXT: setne %dl
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jne .LBB1_15
|
||||
; CHECK-NEXT: # %bb.12: # %if.end308.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: jne .LBB1_16
|
||||
; CHECK-NEXT: # %bb.13: # %if.end308.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je .LBB1_17
|
||||
; CHECK-NEXT: # %bb.13: # %if.end335.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: je .LBB1_7
|
||||
; CHECK-NEXT: # %bb.14: # %if.end335.i
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: movl $0, %esi
|
||||
; CHECK-NEXT: jne .LBB1_7
|
||||
; CHECK-NEXT: # %bb.14: # %merge_other
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: jne .LBB1_8
|
||||
; CHECK-NEXT: # %bb.15: # %merge_other
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: xorl %esi, %esi
|
||||
; CHECK-NEXT: jmp .LBB1_16
|
||||
; CHECK-NEXT: .LBB1_15: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: jmp .LBB1_17
|
||||
; CHECK-NEXT: .LBB1_16: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: movb %dl, %sil
|
||||
; CHECK-NEXT: addl $3, %esi
|
||||
; CHECK-NEXT: .LBB1_16: # %outer_loop_latch
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: .LBB1_17: # %outer_loop_latch
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: # implicit-def: $dl
|
||||
; CHECK-NEXT: jmp .LBB1_7
|
||||
; CHECK-NEXT: .LBB1_17: # %merge_predecessor_split
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: jmp .LBB1_8
|
||||
; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: movb $32, %dl
|
||||
; CHECK-NEXT: xorl %esi, %esi
|
||||
; CHECK-NEXT: .LBB1_7: # %outer_loop_latch
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
|
||||
; CHECK-NEXT: .LBB1_8: # %outer_loop_latch
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1
|
||||
; CHECK-NEXT: movzwl %si, %esi
|
||||
; CHECK-NEXT: decl %esi
|
||||
; CHECK-NEXT: movzwl %si, %esi
|
||||
; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx
|
||||
; CHECK-NEXT: testl %ebp, %ebp
|
||||
; CHECK-NEXT: jne .LBB1_8
|
||||
; CHECK-NEXT: jne .LBB1_9
|
||||
; CHECK-NEXT: .LBB1_18: # %while.cond.us1412.i
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
|
@ -10,35 +10,30 @@
|
||||
define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 {
|
||||
; CHECK-LABEL: repeated_tail_dup:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_1: # %for.cond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: je .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %land.lhs.true
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl $10, (%rdx)
|
||||
; CHECK-NEXT: jmp .LBB0_6
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %land.lhs.true
|
||||
; CHECK-NEXT: movl $10, (%rdx)
|
||||
; CHECK-NEXT: .LBB0_6: # %dup2
|
||||
; CHECK-NEXT: movl $2, (%rcx)
|
||||
; CHECK-NEXT: testl %r9d, %r9d
|
||||
; CHECK-NEXT: jne .LBB0_8
|
||||
; CHECK-NEXT: .LBB0_1: # %for.cond
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: jne .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_3: # %if.end56
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: testb $1, %sil
|
||||
; CHECK-NEXT: je .LBB0_5
|
||||
; CHECK-NEXT: # %bb.4: # %if.then64
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movb $1, (%r8)
|
||||
; CHECK-NEXT: testl %r9d, %r9d
|
||||
; CHECK-NEXT: je .LBB0_1
|
||||
; CHECK-NEXT: jmp .LBB0_8
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_5: # %if.end70
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl $12, (%rdx)
|
||||
; CHECK-NEXT: .LBB0_6: # %dup2
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl $2, (%rcx)
|
||||
; CHECK-NEXT: testl %r9d, %r9d
|
||||
; CHECK-NEXT: je .LBB0_1
|
||||
; CHECK-NEXT: jmp .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_8: # %for.end
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -115,8 +115,17 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
||||
; SSE-NEXT: jne .LBB0_4
|
||||
; SSE-NEXT: # %bb.5: # %middle.block
|
||||
; SSE-NEXT: cmpq %rax, %rdx
|
||||
; SSE-NEXT: je .LBB0_9
|
||||
; SSE-NEXT: jne .LBB0_6
|
||||
; SSE-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: .p2align 4, 0x90
|
||||
; SSE-NEXT: .LBB0_8: # %for.body
|
||||
; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; SSE-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; SSE-NEXT: incq %rdx
|
||||
; SSE-NEXT: cmpq %rdx, %rax
|
||||
; SSE-NEXT: je .LBB0_9
|
||||
; SSE-NEXT: .LBB0_6: # %for.body
|
||||
; SSE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE-NEXT: cmpb $0, (%rsi,%rdx)
|
||||
@ -125,15 +134,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
||||
; SSE-NEXT: # %bb.7: # %for.body
|
||||
; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; SSE-NEXT: movl %r8d, %ecx
|
||||
; SSE-NEXT: .LBB0_8: # %for.body
|
||||
; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; SSE-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; SSE-NEXT: incq %rdx
|
||||
; SSE-NEXT: cmpq %rdx, %rax
|
||||
; SSE-NEXT: jne .LBB0_6
|
||||
; SSE-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: jmp .LBB0_8
|
||||
;
|
||||
; AVX1-LABEL: vector_variable_shift_left_loop:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
@ -241,8 +242,19 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
||||
; AVX1-NEXT: jne .LBB0_4
|
||||
; AVX1-NEXT: # %bb.5: # %middle.block
|
||||
; AVX1-NEXT: cmpq %rax, %rdx
|
||||
; AVX1-NEXT: je .LBB0_9
|
||||
; AVX1-NEXT: jne .LBB0_6
|
||||
; AVX1-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; AVX1-NEXT: addq $24, %rsp
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1-NEXT: .p2align 4, 0x90
|
||||
; AVX1-NEXT: .LBB0_8: # %for.body
|
||||
; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; AVX1-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; AVX1-NEXT: incq %rdx
|
||||
; AVX1-NEXT: cmpq %rdx, %rax
|
||||
; AVX1-NEXT: je .LBB0_9
|
||||
; AVX1-NEXT: .LBB0_6: # %for.body
|
||||
; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX1-NEXT: cmpb $0, (%rsi,%rdx)
|
||||
@ -251,17 +263,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
||||
; AVX1-NEXT: # %bb.7: # %for.body
|
||||
; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX1-NEXT: movl %r8d, %ecx
|
||||
; AVX1-NEXT: .LBB0_8: # %for.body
|
||||
; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; AVX1-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; AVX1-NEXT: incq %rdx
|
||||
; AVX1-NEXT: cmpq %rdx, %rax
|
||||
; AVX1-NEXT: jne .LBB0_6
|
||||
; AVX1-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; AVX1-NEXT: addq $24, %rsp
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1-NEXT: jmp .LBB0_8
|
||||
;
|
||||
; AVX2-LABEL: vector_variable_shift_left_loop:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
@ -316,8 +318,18 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
||||
; AVX2-NEXT: jne .LBB0_4
|
||||
; AVX2-NEXT: # %bb.5: # %middle.block
|
||||
; AVX2-NEXT: cmpq %rax, %rdx
|
||||
; AVX2-NEXT: je .LBB0_9
|
||||
; AVX2-NEXT: jne .LBB0_6
|
||||
; AVX2-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-NEXT: .p2align 4, 0x90
|
||||
; AVX2-NEXT: .LBB0_8: # %for.body
|
||||
; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; AVX2-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; AVX2-NEXT: incq %rdx
|
||||
; AVX2-NEXT: cmpq %rdx, %rax
|
||||
; AVX2-NEXT: je .LBB0_9
|
||||
; AVX2-NEXT: .LBB0_6: # %for.body
|
||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX2-NEXT: cmpb $0, (%rsi,%rdx)
|
||||
@ -326,16 +338,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
||||
; AVX2-NEXT: # %bb.7: # %for.body
|
||||
; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX2-NEXT: movl %r8d, %ecx
|
||||
; AVX2-NEXT: .LBB0_8: # %for.body
|
||||
; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; AVX2-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; AVX2-NEXT: incq %rdx
|
||||
; AVX2-NEXT: cmpq %rdx, %rax
|
||||
; AVX2-NEXT: jne .LBB0_6
|
||||
; AVX2-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-NEXT: jmp .LBB0_8
|
||||
entry:
|
||||
%cmp12 = icmp sgt i32 %count, 0
|
||||
br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
@ -7,9 +7,13 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: pushl %eax
|
||||
; CHECK-NEXT: movl $0, (%esp)
|
||||
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %forbody
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl (%esp), %eax
|
||||
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: jge .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %forbody
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl (%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
@ -19,12 +23,8 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: pextrb $2, %xmm1, 2(%ecx,%eax,4)
|
||||
; CHECK-NEXT: pextrw $0, %xmm1, (%ecx,%eax,4)
|
||||
; CHECK-NEXT: incl (%esp)
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl (%esp), %eax
|
||||
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: jl .LBB0_2
|
||||
; CHECK-NEXT: # %bb.3: # %afterfor
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %afterfor
|
||||
; CHECK-NEXT: popl %eax
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
|
@ -10,9 +10,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
|
||||
; CHECK-NEXT: movl $0, (%esp)
|
||||
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <4,4,4,4,4,4,4,4,u,u,u,u,u,u,u,u>
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %forbody
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl (%esp), %eax
|
||||
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: jge .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %forbody
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl (%esp), %eax
|
||||
; CHECK-NEXT: leal (,%eax,8), %ecx
|
||||
@ -26,12 +30,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
|
||||
; CHECK-NEXT: pand %xmm1, %xmm2
|
||||
; CHECK-NEXT: movq %xmm2, (%edx,%eax,8)
|
||||
; CHECK-NEXT: incl (%esp)
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl (%esp), %eax
|
||||
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: jl .LBB0_2
|
||||
; CHECK-NEXT: # %bb.3: # %afterfor
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %afterfor
|
||||
; CHECK-NEXT: addl $12, %esp
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
|
@ -17,9 +17,13 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $65537, {{[0-9]+}}(%esp) # imm = 0x10001
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %forbody
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmpl 16(%ebp), %eax
|
||||
; CHECK-NEXT: jge .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %forbody
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl 12(%ebp), %edx
|
||||
@ -30,12 +34,8 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: pextrw $2, %xmm1, 4(%ecx,%eax,8)
|
||||
; CHECK-NEXT: movd %xmm1, (%ecx,%eax,8)
|
||||
; CHECK-NEXT: incl {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmpl 16(%ebp), %eax
|
||||
; CHECK-NEXT: jl .LBB0_2
|
||||
; CHECK-NEXT: # %bb.3: # %afterfor
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %afterfor
|
||||
; CHECK-NEXT: movl %ebp, %esp
|
||||
; CHECK-NEXT: popl %ebp
|
||||
; CHECK-NEXT: retl
|
||||
|
@ -16,9 +16,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
|
||||
; SSE2-NEXT: movl $0, -{{[0-9]+}}(%rsp)
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u>
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <2,4,2,2,2,u,u,u>
|
||||
; SSE2-NEXT: jmp .LBB0_1
|
||||
; SSE2-NEXT: .p2align 4, 0x90
|
||||
; SSE2-NEXT: .LBB0_2: # %forbody
|
||||
; SSE2-NEXT: .LBB0_1: # %forcond
|
||||
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: jge .LBB0_3
|
||||
; SSE2-NEXT: # %bb.2: # %forbody
|
||||
; SSE2-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; SSE2-NEXT: movslq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
|
||||
@ -31,12 +35,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
|
||||
; SSE2-NEXT: pextrw $4, %xmm2, %edx
|
||||
; SSE2-NEXT: movw %dx, 8(%rcx,%rax)
|
||||
; SSE2-NEXT: incl -{{[0-9]+}}(%rsp)
|
||||
; SSE2-NEXT: .LBB0_1: # %forcond
|
||||
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: jl .LBB0_2
|
||||
; SSE2-NEXT: # %bb.3: # %afterfor
|
||||
; SSE2-NEXT: jmp .LBB0_1
|
||||
; SSE2-NEXT: .LBB0_3: # %afterfor
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: update:
|
||||
@ -49,9 +49,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
|
||||
; SSE41-NEXT: movw $0, -{{[0-9]+}}(%rsp)
|
||||
; SSE41-NEXT: movl $0, -{{[0-9]+}}(%rsp)
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u>
|
||||
; SSE41-NEXT: jmp .LBB0_1
|
||||
; SSE41-NEXT: .p2align 4, 0x90
|
||||
; SSE41-NEXT: .LBB0_2: # %forbody
|
||||
; SSE41-NEXT: .LBB0_1: # %forcond
|
||||
; SSE41-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE41-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: jge .LBB0_3
|
||||
; SSE41-NEXT: # %bb.2: # %forbody
|
||||
; SSE41-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; SSE41-NEXT: movslq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE41-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
|
||||
@ -66,12 +70,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
|
||||
; SSE41-NEXT: pextrw $4, %xmm1, 8(%rcx,%rax)
|
||||
; SSE41-NEXT: movq %xmm2, (%rcx,%rax)
|
||||
; SSE41-NEXT: incl -{{[0-9]+}}(%rsp)
|
||||
; SSE41-NEXT: .LBB0_1: # %forcond
|
||||
; SSE41-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE41-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: jl .LBB0_2
|
||||
; SSE41-NEXT: # %bb.3: # %afterfor
|
||||
; SSE41-NEXT: jmp .LBB0_1
|
||||
; SSE41-NEXT: .LBB0_3: # %afterfor
|
||||
; SSE41-NEXT: retq
|
||||
entry:
|
||||
%dst.addr = alloca <5 x i16>*
|
||||
|
@ -14,9 +14,13 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: movl $1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <3,3,3,u>
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %forbody
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; CHECK-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
|
||||
; CHECK-NEXT: jge .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %forbody
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
|
||||
@ -28,12 +32,8 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: pextrd $2, %xmm1, 8(%rcx,%rax)
|
||||
; CHECK-NEXT: movq %xmm1, (%rcx,%rax)
|
||||
; CHECK-NEXT: incl -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; CHECK-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
|
||||
; CHECK-NEXT: jl .LBB0_2
|
||||
; CHECK-NEXT: # %bb.3: # %afterfor
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %afterfor
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%dst.addr = alloca <3 x i32>*
|
||||
|
@ -15,9 +15,13 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: movl $1065353216, {{[0-9]+}}(%esp) # imm = 0x3F800000
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1.97604004E+3,1.97604004E+3,1.97604004E+3,u>
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_2: # %forbody
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmpl 16(%ebp), %eax
|
||||
; CHECK-NEXT: jge .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %forbody
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl 8(%ebp), %ecx
|
||||
@ -30,12 +34,8 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
|
||||
; CHECK-NEXT: extractps $1, %xmm1, 4(%ecx,%eax)
|
||||
; CHECK-NEXT: movss %xmm1, (%ecx,%eax)
|
||||
; CHECK-NEXT: incl {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: .LBB0_1: # %forcond
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmpl 16(%ebp), %eax
|
||||
; CHECK-NEXT: jl .LBB0_2
|
||||
; CHECK-NEXT: # %bb.3: # %afterfor
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %afterfor
|
||||
; CHECK-NEXT: movl %ebp, %esp
|
||||
; CHECK-NEXT: popl %ebp
|
||||
; CHECK-NEXT: retl
|
||||
|
@ -11,9 +11,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
|
||||
; WIDE-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; WIDE-NEXT: movdqa {{.*#+}} xmm1 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
|
||||
; WIDE-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; WIDE-NEXT: jmp .LBB0_1
|
||||
; WIDE-NEXT: .p2align 4, 0x90
|
||||
; WIDE-NEXT: .LBB0_2: # %forbody
|
||||
; WIDE-NEXT: .LBB0_1: # %forcond
|
||||
; WIDE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; WIDE-NEXT: movl (%esp), %eax
|
||||
; WIDE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; WIDE-NEXT: jge .LBB0_3
|
||||
; WIDE-NEXT: # %bb.2: # %forbody
|
||||
; WIDE-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; WIDE-NEXT: movl (%esp), %eax
|
||||
; WIDE-NEXT: leal (,%eax,8), %ecx
|
||||
@ -30,12 +34,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
|
||||
; WIDE-NEXT: psubb %xmm2, %xmm3
|
||||
; WIDE-NEXT: movq %xmm3, (%edx,%eax,8)
|
||||
; WIDE-NEXT: incl (%esp)
|
||||
; WIDE-NEXT: .LBB0_1: # %forcond
|
||||
; WIDE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; WIDE-NEXT: movl (%esp), %eax
|
||||
; WIDE-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; WIDE-NEXT: jl .LBB0_2
|
||||
; WIDE-NEXT: # %bb.3: # %afterfor
|
||||
; WIDE-NEXT: jmp .LBB0_1
|
||||
; WIDE-NEXT: .LBB0_3: # %afterfor
|
||||
; WIDE-NEXT: addl $12, %esp
|
||||
; WIDE-NEXT: retl
|
||||
entry:
|
||||
|
@ -21,18 +21,18 @@
|
||||
; CHECK-LABEL: # %bb.{{.*}}:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- 0
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]]
|
||||
; CHECK: jmp .LBB0_1
|
||||
; CHECK: .LBB0_2:
|
||||
; CHECK: .LBB0_1:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: je .LBB0_4
|
||||
; CHECK: # %bb.{{.*}}:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: jne .LBB0_1
|
||||
; CHECK: # %bb.{{.*}}:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: incl %[[REG]]
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: .LBB0_1:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: jne .LBB0_2
|
||||
; CHECK: # %bb.{{.*}}:
|
||||
; CHECK: jmp .LBB0_1
|
||||
; CHECK: .LBB0_4:
|
||||
; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]]
|
||||
; CHECK: retq
|
||||
|
||||
|
@ -24,6 +24,12 @@
|
||||
; with the Orders insertion point vector.
|
||||
|
||||
; CHECK-LABEL: f: # @f
|
||||
; CHECK: .LBB0_4:
|
||||
; Check that this DEBUG_VALUE comes before the left shift.
|
||||
; CHECK: #DEBUG_VALUE: bit_offset <- $ecx
|
||||
; CHECK: .cv_loc 0 1 8 28 # t.c:8:28
|
||||
; CHECK: movl $1, %[[reg:[^ ]*]]
|
||||
; CHECK: shll %cl, %[[reg]]
|
||||
; CHECK: .LBB0_2: # %while.body
|
||||
; CHECK: movl $32, %ecx
|
||||
; CHECK: testl {{.*}}
|
||||
@ -31,12 +37,7 @@
|
||||
; CHECK: # %bb.3: # %if.then
|
||||
; CHECK: callq if_then
|
||||
; CHECK: movl %eax, %ecx
|
||||
; CHECK: .LBB0_4: # %if.end
|
||||
; Check that this DEBUG_VALUE comes before the left shift.
|
||||
; CHECK: #DEBUG_VALUE: bit_offset <- $ecx
|
||||
; CHECK: .cv_loc 0 1 8 28 # t.c:8:28
|
||||
; CHECK: movl $1, %[[reg:[^ ]*]]
|
||||
; CHECK: shll %cl, %[[reg]]
|
||||
; CHECK: jmp .LBB0_4
|
||||
|
||||
; ModuleID = 't.c'
|
||||
source_filename = "t.c"
|
||||
|
Loading…
Reference in New Issue
Block a user