mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 19:12:56 +02:00
[MachineSink] sink more profitable loads
Reviewed By: qcolombet Differential Revision: https://reviews.llvm.org/D86864
This commit is contained in:
parent
5f9db1f559
commit
39e1336a34
@ -127,6 +127,12 @@ namespace {
|
||||
/// current block.
|
||||
DenseSet<DebugVariable> SeenDbgVars;
|
||||
|
||||
std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
|
||||
HasStoreCache;
|
||||
std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
|
||||
std::vector<MachineInstr *>>
|
||||
StoreInstrCache;
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
|
||||
@ -159,6 +165,9 @@ namespace {
|
||||
MachineBasicBlock *From,
|
||||
MachineBasicBlock *To);
|
||||
|
||||
bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
|
||||
MachineInstr &MI);
|
||||
|
||||
/// Postpone the splitting of the given critical
|
||||
/// edge (\p From, \p To).
|
||||
///
|
||||
@ -359,6 +368,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
|
||||
EverMadeChange = true;
|
||||
}
|
||||
|
||||
HasStoreCache.clear();
|
||||
StoreInstrCache.clear();
|
||||
|
||||
// Now clear any kill flags for recorded registers.
|
||||
for (auto I : RegsToClearKillFlags)
|
||||
MRI->clearKillFlags(I);
|
||||
@ -919,6 +931,73 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
|
||||
}
|
||||
}
|
||||
|
||||
/// hasStoreBetween - check if there is store betweeen straight line blocks From
|
||||
/// and To.
|
||||
bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
|
||||
MachineBasicBlock *To, MachineInstr &MI) {
|
||||
// Make sure From and To are in straight line which means From dominates To
|
||||
// and To post dominates From.
|
||||
if (!DT->dominates(From, To) || !PDT->dominates(To, From))
|
||||
return true;
|
||||
|
||||
auto BlockPair = std::make_pair(From, To);
|
||||
|
||||
// Does these two blocks pair be queried before and have a definite cached
|
||||
// result?
|
||||
if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
|
||||
return HasStoreCache[BlockPair];
|
||||
|
||||
if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
|
||||
return std::any_of(
|
||||
StoreInstrCache[BlockPair].begin(), StoreInstrCache[BlockPair].end(),
|
||||
[&](MachineInstr *I) { return I->mayAlias(AA, MI, false); });
|
||||
|
||||
bool SawStore = false;
|
||||
bool HasAliasedStore = false;
|
||||
DenseSet<MachineBasicBlock *> HandledBlocks;
|
||||
// Go through all reachable blocks from From.
|
||||
for (MachineBasicBlock *BB : depth_first(From)) {
|
||||
// We insert the instruction at the start of block To, so no need to worry
|
||||
// about stores inside To.
|
||||
// Store in block From should be already considered when just enter function
|
||||
// SinkInstruction.
|
||||
if (BB == To || BB == From)
|
||||
continue;
|
||||
|
||||
// We already handle this BB in previous iteration.
|
||||
if (HandledBlocks.count(BB))
|
||||
continue;
|
||||
|
||||
HandledBlocks.insert(BB);
|
||||
// To post dominates BB, it must be a path from block From.
|
||||
if (PDT->dominates(To, BB)) {
|
||||
for (MachineInstr &I : *BB) {
|
||||
// Treat as alias conservatively for a call or an ordered memory
|
||||
// operation.
|
||||
if (I.isCall() || I.hasOrderedMemoryRef()) {
|
||||
HasStoreCache[BlockPair] = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (I.mayStore()) {
|
||||
SawStore = true;
|
||||
// We still have chance to sink MI if all stores between are not
|
||||
// aliased to MI.
|
||||
// Cache all store instructions, so that we don't need to go through
|
||||
// all From reachable blocks for next load instruction.
|
||||
if (I.mayAlias(AA, MI, false))
|
||||
HasAliasedStore = true;
|
||||
StoreInstrCache[BlockPair].push_back(&I);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If there is no store at all, cache the result.
|
||||
if (!SawStore)
|
||||
HasStoreCache[BlockPair] = false;
|
||||
return HasAliasedStore;
|
||||
}
|
||||
|
||||
/// SinkInstruction - Determine whether it is safe to sink the specified machine
|
||||
/// instruction out of its current block into a successor.
|
||||
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
|
||||
@ -979,8 +1058,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
|
||||
// We cannot sink a load across a critical edge - there may be stores in
|
||||
// other code paths.
|
||||
bool TryBreak = false;
|
||||
bool store = true;
|
||||
if (!MI.isSafeToMove(AA, store)) {
|
||||
bool Store =
|
||||
MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
|
||||
if (!MI.isSafeToMove(AA, Store)) {
|
||||
LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
|
||||
TryBreak = true;
|
||||
}
|
||||
|
@ -40,40 +40,39 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind {
|
||||
; RV32I-LABEL: cmovcc128:
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: xori a1, a1, 123
|
||||
; RV32I-NEXT: or a2, a1, a2
|
||||
; RV32I-NEXT: mv a1, a3
|
||||
; RV32I-NEXT: beqz a2, .LBB1_2
|
||||
; RV32I-NEXT: or a1, a1, a2
|
||||
; RV32I-NEXT: mv a2, a3
|
||||
; RV32I-NEXT: beqz a1, .LBB1_2
|
||||
; RV32I-NEXT: # %bb.1: # %entry
|
||||
; RV32I-NEXT: mv a1, a4
|
||||
; RV32I-NEXT: mv a2, a4
|
||||
; RV32I-NEXT: .LBB1_2: # %entry
|
||||
; RV32I-NEXT: lw a6, 0(a1)
|
||||
; RV32I-NEXT: beqz a2, .LBB1_6
|
||||
; RV32I-NEXT: beqz a1, .LBB1_5
|
||||
; RV32I-NEXT: # %bb.3: # %entry
|
||||
; RV32I-NEXT: addi a1, a4, 4
|
||||
; RV32I-NEXT: lw a5, 0(a1)
|
||||
; RV32I-NEXT: bnez a2, .LBB1_7
|
||||
; RV32I-NEXT: addi a7, a4, 4
|
||||
; RV32I-NEXT: bnez a1, .LBB1_6
|
||||
; RV32I-NEXT: .LBB1_4:
|
||||
; RV32I-NEXT: addi a1, a3, 8
|
||||
; RV32I-NEXT: lw a1, 0(a1)
|
||||
; RV32I-NEXT: bnez a2, .LBB1_8
|
||||
; RV32I-NEXT: addi a5, a3, 8
|
||||
; RV32I-NEXT: j .LBB1_7
|
||||
; RV32I-NEXT: .LBB1_5:
|
||||
; RV32I-NEXT: addi a2, a3, 12
|
||||
; RV32I-NEXT: j .LBB1_9
|
||||
; RV32I-NEXT: .LBB1_6:
|
||||
; RV32I-NEXT: addi a1, a3, 4
|
||||
; RV32I-NEXT: lw a5, 0(a1)
|
||||
; RV32I-NEXT: beqz a2, .LBB1_4
|
||||
; RV32I-NEXT: addi a7, a3, 4
|
||||
; RV32I-NEXT: beqz a1, .LBB1_4
|
||||
; RV32I-NEXT: .LBB1_6: # %entry
|
||||
; RV32I-NEXT: addi a5, a4, 8
|
||||
; RV32I-NEXT: .LBB1_7: # %entry
|
||||
; RV32I-NEXT: addi a1, a4, 8
|
||||
; RV32I-NEXT: lw a6, 0(a2)
|
||||
; RV32I-NEXT: lw a7, 0(a7)
|
||||
; RV32I-NEXT: lw a2, 0(a5)
|
||||
; RV32I-NEXT: beqz a1, .LBB1_9
|
||||
; RV32I-NEXT: # %bb.8: # %entry
|
||||
; RV32I-NEXT: addi a1, a4, 12
|
||||
; RV32I-NEXT: j .LBB1_10
|
||||
; RV32I-NEXT: .LBB1_9:
|
||||
; RV32I-NEXT: addi a1, a3, 12
|
||||
; RV32I-NEXT: .LBB1_10: # %entry
|
||||
; RV32I-NEXT: lw a1, 0(a1)
|
||||
; RV32I-NEXT: beqz a2, .LBB1_5
|
||||
; RV32I-NEXT: .LBB1_8: # %entry
|
||||
; RV32I-NEXT: addi a2, a4, 12
|
||||
; RV32I-NEXT: .LBB1_9: # %entry
|
||||
; RV32I-NEXT: lw a2, 0(a2)
|
||||
; RV32I-NEXT: sw a2, 12(a0)
|
||||
; RV32I-NEXT: sw a1, 8(a0)
|
||||
; RV32I-NEXT: sw a5, 4(a0)
|
||||
; RV32I-NEXT: sw a1, 12(a0)
|
||||
; RV32I-NEXT: sw a2, 8(a0)
|
||||
; RV32I-NEXT: sw a7, 4(a0)
|
||||
; RV32I-NEXT: sw a6, 0(a0)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -124,40 +123,39 @@ entry:
|
||||
define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind {
|
||||
; RV32I-LABEL: cmov128:
|
||||
; RV32I: # %bb.0: # %entry
|
||||
; RV32I-NEXT: andi a4, a1, 1
|
||||
; RV32I-NEXT: mv a1, a2
|
||||
; RV32I-NEXT: bnez a4, .LBB3_2
|
||||
; RV32I-NEXT: andi a1, a1, 1
|
||||
; RV32I-NEXT: mv a4, a2
|
||||
; RV32I-NEXT: bnez a1, .LBB3_2
|
||||
; RV32I-NEXT: # %bb.1: # %entry
|
||||
; RV32I-NEXT: mv a1, a3
|
||||
; RV32I-NEXT: mv a4, a3
|
||||
; RV32I-NEXT: .LBB3_2: # %entry
|
||||
; RV32I-NEXT: lw a6, 0(a1)
|
||||
; RV32I-NEXT: bnez a4, .LBB3_6
|
||||
; RV32I-NEXT: bnez a1, .LBB3_5
|
||||
; RV32I-NEXT: # %bb.3: # %entry
|
||||
; RV32I-NEXT: addi a1, a3, 4
|
||||
; RV32I-NEXT: lw a5, 0(a1)
|
||||
; RV32I-NEXT: beqz a4, .LBB3_7
|
||||
; RV32I-NEXT: addi a7, a3, 4
|
||||
; RV32I-NEXT: beqz a1, .LBB3_6
|
||||
; RV32I-NEXT: .LBB3_4:
|
||||
; RV32I-NEXT: addi a1, a2, 8
|
||||
; RV32I-NEXT: lw a1, 0(a1)
|
||||
; RV32I-NEXT: beqz a4, .LBB3_8
|
||||
; RV32I-NEXT: addi a5, a2, 8
|
||||
; RV32I-NEXT: j .LBB3_7
|
||||
; RV32I-NEXT: .LBB3_5:
|
||||
; RV32I-NEXT: addi a2, a2, 12
|
||||
; RV32I-NEXT: j .LBB3_9
|
||||
; RV32I-NEXT: .LBB3_6:
|
||||
; RV32I-NEXT: addi a1, a2, 4
|
||||
; RV32I-NEXT: lw a5, 0(a1)
|
||||
; RV32I-NEXT: bnez a4, .LBB3_4
|
||||
; RV32I-NEXT: addi a7, a2, 4
|
||||
; RV32I-NEXT: bnez a1, .LBB3_4
|
||||
; RV32I-NEXT: .LBB3_6: # %entry
|
||||
; RV32I-NEXT: addi a5, a3, 8
|
||||
; RV32I-NEXT: .LBB3_7: # %entry
|
||||
; RV32I-NEXT: addi a1, a3, 8
|
||||
; RV32I-NEXT: lw a6, 0(a4)
|
||||
; RV32I-NEXT: lw a7, 0(a7)
|
||||
; RV32I-NEXT: lw a4, 0(a5)
|
||||
; RV32I-NEXT: bnez a1, .LBB3_9
|
||||
; RV32I-NEXT: # %bb.8: # %entry
|
||||
; RV32I-NEXT: addi a1, a3, 12
|
||||
; RV32I-NEXT: j .LBB3_10
|
||||
; RV32I-NEXT: .LBB3_9:
|
||||
; RV32I-NEXT: addi a1, a2, 12
|
||||
; RV32I-NEXT: .LBB3_10: # %entry
|
||||
; RV32I-NEXT: lw a1, 0(a1)
|
||||
; RV32I-NEXT: bnez a4, .LBB3_5
|
||||
; RV32I-NEXT: .LBB3_8: # %entry
|
||||
; RV32I-NEXT: addi a2, a3, 12
|
||||
; RV32I-NEXT: .LBB3_9: # %entry
|
||||
; RV32I-NEXT: lw a2, 0(a2)
|
||||
; RV32I-NEXT: sw a2, 12(a0)
|
||||
; RV32I-NEXT: sw a1, 8(a0)
|
||||
; RV32I-NEXT: sw a5, 4(a0)
|
||||
; RV32I-NEXT: sw a1, 12(a0)
|
||||
; RV32I-NEXT: sw a4, 8(a0)
|
||||
; RV32I-NEXT: sw a7, 4(a0)
|
||||
; RV32I-NEXT: sw a6, 0(a0)
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
|
@ -14,40 +14,40 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
; CHECK-NEXT: movq %rsp, %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
; CHECK-NEXT: movslq (%rdi), %rax
|
||||
; CHECK-NEXT: movslq (%rdi), %rdi
|
||||
; CHECK-NEXT: movslq (%rsi), %r8
|
||||
; CHECK-NEXT: movslq (%rdx), %r10
|
||||
; CHECK-NEXT: movl (%rcx), %edi
|
||||
; CHECK-NEXT: movslq (%r9), %rcx
|
||||
; CHECK-NEXT: movq %rsp, %rdx
|
||||
; CHECK-NEXT: subl %eax, %r8d
|
||||
; CHECK-NEXT: movslq %r8d, %rsi
|
||||
; CHECK-NEXT: movl (%rcx), %esi
|
||||
; CHECK-NEXT: movq %rsp, %rcx
|
||||
; CHECK-NEXT: subl %edi, %r8d
|
||||
; CHECK-NEXT: movslq %r8d, %rdx
|
||||
; CHECK-NEXT: js .LBB0_1
|
||||
; CHECK-NEXT: # %bb.11: # %b63
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: testq %rdx, %rdx
|
||||
; CHECK-NEXT: js .LBB0_14
|
||||
; CHECK-NEXT: # %bb.12:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_13: # %a25b
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: testb %dil, %dil
|
||||
; CHECK-NEXT: je .LBB0_13
|
||||
; CHECK-NEXT: .LBB0_14: # %b85
|
||||
; CHECK-NEXT: movb $1, %al
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jne .LBB0_1
|
||||
; CHECK-NEXT: # %bb.15:
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_16: # %a25b140
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: testb %dil, %dil
|
||||
; CHECK-NEXT: je .LBB0_16
|
||||
; CHECK-NEXT: .LBB0_1: # %a29b
|
||||
; CHECK-NEXT: cmpl %r10d, %edi
|
||||
; CHECK-NEXT: cmpl %r10d, %esi
|
||||
; CHECK-NEXT: js .LBB0_10
|
||||
; CHECK-NEXT: # %bb.2: # %b158
|
||||
; CHECK-NEXT: movslq (%r9), %rsi
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: movb $1, %r10b
|
||||
@ -77,7 +77,7 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: js .LBB0_4
|
||||
; CHECK-NEXT: # %bb.17: # %b179
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: testq %rdx, %rdx
|
||||
; CHECK-NEXT: js .LBB0_18
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_37: # %a30b
|
||||
@ -97,7 +97,7 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: je .LBB0_19
|
||||
; CHECK-NEXT: .LBB0_4: # %a33b
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: orl %r8d, %eax
|
||||
; CHECK-NEXT: movl %eax, %r9d
|
||||
; CHECK-NEXT: shrl $31, %r9d
|
||||
@ -106,7 +106,7 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: .LBB0_5: # %a50b
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
|
||||
; CHECK-NEXT: movl %r8d, %eax
|
||||
; CHECK-NEXT: orl %ecx, %eax
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: movl %eax, %r11d
|
||||
; CHECK-NEXT: shrl $31, %r11d
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
@ -156,7 +156,7 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # => This Loop Header: Depth=2
|
||||
; CHECK-NEXT: # Child Loop BB0_21 Depth 3
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: js .LBB0_22
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_21: # %a35b
|
||||
@ -169,14 +169,14 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_28: # %b1016
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_26 Depth=2
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: jle .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_26: # %b858
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # => This Loop Header: Depth=2
|
||||
; CHECK-NEXT: # Child Loop BB0_38 Depth 3
|
||||
; CHECK-NEXT: # Child Loop BB0_29 Depth 3
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: testq %rdx, %rdx
|
||||
; CHECK-NEXT: js .LBB0_27
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_38: # %a53b
|
||||
@ -194,38 +194,38 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # Parent Loop BB0_26 Depth=2
|
||||
; CHECK-NEXT: # => This Inner Loop Header: Depth=3
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: testq %rdx, %rdx
|
||||
; CHECK-NEXT: jle .LBB0_29
|
||||
; CHECK-NEXT: jmp .LBB0_28
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_32: # %b1263
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_30 Depth=2
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: testq %rdx, %rdx
|
||||
; CHECK-NEXT: jle .LBB0_7
|
||||
; CHECK-NEXT: .LBB0_30: # %b1117
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # => This Loop Header: Depth=2
|
||||
; CHECK-NEXT: # Child Loop BB0_39 Depth 3
|
||||
; CHECK-NEXT: # Child Loop BB0_33 Depth 3
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: js .LBB0_31
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_39: # %a63b
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # Parent Loop BB0_30 Depth=2
|
||||
; CHECK-NEXT: # => This Inner Loop Header: Depth=3
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: jle .LBB0_39
|
||||
; CHECK-NEXT: .LBB0_31: # %b1139
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_30 Depth=2
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: jle .LBB0_32
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_33: # %a63b1266
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # Parent Loop BB0_30 Depth=2
|
||||
; CHECK-NEXT: # => This Inner Loop Header: Depth=3
|
||||
; CHECK-NEXT: testq %rcx, %rcx
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: jle .LBB0_33
|
||||
; CHECK-NEXT: jmp .LBB0_32
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
@ -237,7 +237,7 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
|
||||
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
|
||||
; CHECK-NEXT: # => This Loop Header: Depth=2
|
||||
; CHECK-NEXT: # Child Loop BB0_24 Depth 3
|
||||
; CHECK-NEXT: testq %rsi, %rsi
|
||||
; CHECK-NEXT: testq %rdx, %rdx
|
||||
; CHECK-NEXT: js .LBB0_25
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_24: # %a45b
|
||||
|
@ -16,31 +16,30 @@ define void @foo(i8* nocapture %_stubArgs) nounwind {
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: subq $152, %rsp
|
||||
; CHECK-NEXT: movq 48(%rdi), %rax
|
||||
; CHECK-NEXT: movl 64(%rdi), %edx
|
||||
; CHECK-NEXT: movl 64(%rdi), %ecx
|
||||
; CHECK-NEXT: movl $200, %esi
|
||||
; CHECK-NEXT: addl 68(%rdi), %esi
|
||||
; CHECK-NEXT: imull $46, %edx, %ecx
|
||||
; CHECK-NEXT: addq %rsi, %rcx
|
||||
; CHECK-NEXT: shlq $4, %rcx
|
||||
; CHECK-NEXT: imull $47, %edx, %edx
|
||||
; CHECK-NEXT: imull $46, %ecx, %edx
|
||||
; CHECK-NEXT: addq %rsi, %rdx
|
||||
; CHECK-NEXT: shlq $4, %rdx
|
||||
; CHECK-NEXT: movaps (%rax,%rdx), %xmm0
|
||||
; CHECK-NEXT: imull $47, %ecx, %ecx
|
||||
; CHECK-NEXT: addq %rsi, %rcx
|
||||
; CHECK-NEXT: shlq $4, %rcx
|
||||
; CHECK-NEXT: cmpl $0, (%rdi)
|
||||
; CHECK-NEXT: jne .LBB0_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: je .LBB0_4
|
||||
; CHECK-NEXT: jmp .LBB0_5
|
||||
; CHECK-NEXT: .LBB0_1:
|
||||
; CHECK-NEXT: movaps (%rax,%rcx), %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jne .LBB0_5
|
||||
; CHECK-NEXT: .LBB0_4: # %entry
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: .LBB0_5: # %entry
|
||||
; CHECK-NEXT: jmp .LBB0_3
|
||||
; CHECK-NEXT: .LBB0_1:
|
||||
; CHECK-NEXT: movaps (%rax,%rdx), %xmm0
|
||||
; CHECK-NEXT: .LBB0_3: # %entry
|
||||
; CHECK-NEXT: movaps (%rax,%rcx), %xmm1
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: jne .LBB0_5
|
||||
; CHECK-NEXT: # %bb.4: # %entry
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: .LBB0_5: # %entry
|
||||
; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: addq $152, %rsp
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -358,44 +358,57 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
|
||||
;
|
||||
; NOGATHER-LABEL: masked_gather_v8i32:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpmovmskb %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB6_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_4: # %else2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm0
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: je .LBB6_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm1, %xmm2
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_7
|
||||
; NOGATHER-NEXT: je .LBB6_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_8: # %else8
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_9
|
||||
; NOGATHER-NEXT: je .LBB6_10
|
||||
; NOGATHER-NEXT: # %bb.9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_10: # %else11
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: je .LBB6_12
|
||||
; NOGATHER-NEXT: .LBB6_11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: # %bb.11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_12: # %else14
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; NOGATHER-NEXT: testb $64, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_13
|
||||
; NOGATHER-NEXT: # %bb.14: # %else17
|
||||
@ -404,26 +417,6 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
|
||||
; NOGATHER-NEXT: .LBB6_16: # %else20
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB6_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB6_8
|
||||
; NOGATHER-NEXT: .LBB6_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: je .LBB6_10
|
||||
; NOGATHER-NEXT: .LBB6_9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_11
|
||||
; NOGATHER-NEXT: jmp .LBB6_12
|
||||
; NOGATHER-NEXT: .LBB6_13: # %cond.load16
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
@ -472,44 +465,58 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
|
||||
;
|
||||
; NOGATHER-LABEL: masked_gather_v8float:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpmovmskb %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB7_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_4: # %else2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm0
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: je .LBB7_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_7
|
||||
; NOGATHER-NEXT: je .LBB7_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_8: # %else8
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_9
|
||||
; NOGATHER-NEXT: je .LBB7_10
|
||||
; NOGATHER-NEXT: # %bb.9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_10: # %else11
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: je .LBB7_12
|
||||
; NOGATHER-NEXT: .LBB7_11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: # %bb.11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_12: # %else14
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; NOGATHER-NEXT: testb $64, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_13
|
||||
; NOGATHER-NEXT: # %bb.14: # %else17
|
||||
@ -518,27 +525,6 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
|
||||
; NOGATHER-NEXT: .LBB7_16: # %else20
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB7_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB7_8
|
||||
; NOGATHER-NEXT: .LBB7_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: je .LBB7_10
|
||||
; NOGATHER-NEXT: .LBB7_9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_11
|
||||
; NOGATHER-NEXT: jmp .LBB7_12
|
||||
; NOGATHER-NEXT: .LBB7_13: # %cond.load16
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
|
@ -165,14 +165,13 @@ define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <
|
||||
; NOCMOV-NEXT: fnstsw %ax
|
||||
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
|
||||
; NOCMOV-NEXT: sahf
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
|
||||
; NOCMOV-NEXT: jne .LBB4_3
|
||||
; NOCMOV-NEXT: # %bb.1: # %entry
|
||||
; NOCMOV-NEXT: jp .LBB4_3
|
||||
; NOCMOV-NEXT: # %bb.2: # %entry
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
|
||||
; NOCMOV-NEXT: .LBB4_3: # %entry
|
||||
; NOCMOV-NEXT: movl (%eax), %ecx
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edx
|
||||
; NOCMOV-NEXT: jne .LBB4_6
|
||||
; NOCMOV-NEXT: # %bb.4: # %entry
|
||||
@ -181,7 +180,6 @@ define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edx
|
||||
; NOCMOV-NEXT: .LBB4_6: # %entry
|
||||
; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; NOCMOV-NEXT: movl (%edx), %edx
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %esi
|
||||
; NOCMOV-NEXT: jne .LBB4_9
|
||||
; NOCMOV-NEXT: # %bb.7: # %entry
|
||||
@ -189,6 +187,8 @@ define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <
|
||||
; NOCMOV-NEXT: # %bb.8: # %entry
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %esi
|
||||
; NOCMOV-NEXT: .LBB4_9: # %entry
|
||||
; NOCMOV-NEXT: movl (%ecx), %ecx
|
||||
; NOCMOV-NEXT: movl (%edx), %edx
|
||||
; NOCMOV-NEXT: movl (%esi), %esi
|
||||
; NOCMOV-NEXT: leal {{[0-9]+}}(%esp), %edi
|
||||
; NOCMOV-NEXT: jne .LBB4_12
|
||||
|
@ -557,63 +557,59 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
|
||||
; MCU-NEXT: testb $1, %al
|
||||
; MCU-NEXT: jne .LBB7_1
|
||||
; MCU-NEXT: # %bb.2:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; MCU-NEXT: movl (%eax), %eax
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
|
||||
; MCU-NEXT: je .LBB7_5
|
||||
; MCU-NEXT: .LBB7_4:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx
|
||||
; MCU-NEXT: movl (%ecx), %ecx
|
||||
; MCU-NEXT: je .LBB7_8
|
||||
; MCU-NEXT: .LBB7_7:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi
|
||||
; MCU-NEXT: movl (%esi), %esi
|
||||
; MCU-NEXT: je .LBB7_11
|
||||
; MCU-NEXT: .LBB7_10:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
|
||||
; MCU-NEXT: movl (%edi), %edi
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
|
||||
; MCU-NEXT: je .LBB7_14
|
||||
; MCU-NEXT: .LBB7_13:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx
|
||||
; MCU-NEXT: movl (%ebx), %ebx
|
||||
; MCU-NEXT: je .LBB7_17
|
||||
; MCU-NEXT: .LBB7_16:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
|
||||
; MCU-NEXT: jmp .LBB7_18
|
||||
; MCU-NEXT: .LBB7_1:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; MCU-NEXT: movl (%eax), %eax
|
||||
; MCU-NEXT: jmp .LBB7_15
|
||||
; MCU-NEXT: .LBB7_1:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
|
||||
; MCU-NEXT: jne .LBB7_4
|
||||
; MCU-NEXT: .LBB7_5:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx
|
||||
; MCU-NEXT: movl (%ecx), %ecx
|
||||
; MCU-NEXT: jne .LBB7_7
|
||||
; MCU-NEXT: .LBB7_8:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi
|
||||
; MCU-NEXT: movl (%esi), %esi
|
||||
; MCU-NEXT: jne .LBB7_10
|
||||
; MCU-NEXT: .LBB7_11:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
|
||||
; MCU-NEXT: movl (%edi), %edi
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
|
||||
; MCU-NEXT: jne .LBB7_13
|
||||
; MCU-NEXT: .LBB7_14:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx
|
||||
; MCU-NEXT: movl (%ebx), %ebx
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; MCU-NEXT: .LBB7_15:
|
||||
; MCU-NEXT: movl (%edi), %ebx
|
||||
; MCU-NEXT: movl (%ecx), %edi
|
||||
; MCU-NEXT: movl (%esi), %esi
|
||||
; MCU-NEXT: movl (%ebp), %ecx
|
||||
; MCU-NEXT: movl (%eax), %eax
|
||||
; MCU-NEXT: jne .LBB7_16
|
||||
; MCU-NEXT: .LBB7_17:
|
||||
; MCU-NEXT: # %bb.17:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
|
||||
; MCU-NEXT: jmp .LBB7_18
|
||||
; MCU-NEXT: .LBB7_16:
|
||||
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
|
||||
; MCU-NEXT: .LBB7_18:
|
||||
; MCU-NEXT: movl (%ebp), %ebp
|
||||
; MCU-NEXT: decl %ebp
|
||||
; MCU-NEXT: decl %ebx
|
||||
; MCU-NEXT: decl %edi
|
||||
; MCU-NEXT: decl %esi
|
||||
; MCU-NEXT: decl %ecx
|
||||
; MCU-NEXT: decl %eax
|
||||
; MCU-NEXT: movl %eax, 20(%edx)
|
||||
; MCU-NEXT: movl %ecx, 16(%edx)
|
||||
; MCU-NEXT: decl %ecx
|
||||
; MCU-NEXT: decl %esi
|
||||
; MCU-NEXT: decl %edi
|
||||
; MCU-NEXT: decl %ebx
|
||||
; MCU-NEXT: movl %ebx, 20(%edx)
|
||||
; MCU-NEXT: movl %edi, 16(%edx)
|
||||
; MCU-NEXT: movl %esi, 12(%edx)
|
||||
; MCU-NEXT: movl %edi, 8(%edx)
|
||||
; MCU-NEXT: movl %ebx, 4(%edx)
|
||||
; MCU-NEXT: movl %ecx, 8(%edx)
|
||||
; MCU-NEXT: movl %eax, 4(%edx)
|
||||
; MCU-NEXT: movl %ebp, (%edx)
|
||||
; MCU-NEXT: popl %esi
|
||||
; MCU-NEXT: popl %edi
|
||||
|
@ -4361,7 +4361,6 @@ define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
|
||||
define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
||||
; SSE2-LABEL: uitofp_load_4i64_to_4f32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
@ -4377,6 +4376,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB83_3:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
@ -4710,40 +4710,38 @@ define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
|
||||
define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-LABEL: uitofp_load_8i64_to_8f32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm5
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_1
|
||||
; SSE2-NEXT: # %bb.2:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: jmp .LBB87_3
|
||||
; SSE2-NEXT: .LBB87_1:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: addss %xmm3, %xmm3
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: addss %xmm2, %xmm2
|
||||
; SSE2-NEXT: .LBB87_3:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_4
|
||||
; SSE2-NEXT: # %bb.5:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: jmp .LBB87_6
|
||||
; SSE2-NEXT: .LBB87_4:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: addss %xmm4, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB87_6:
|
||||
; SSE2-NEXT: movq %xmm5, %rax
|
||||
; SSE2-NEXT: movq %xmm3, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_7
|
||||
; SSE2-NEXT: # %bb.8:
|
||||
@ -4759,55 +4757,59 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: addss %xmm0, %xmm0
|
||||
; SSE2-NEXT: .LBB87_9:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm5, %rax
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm6
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm3, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_10
|
||||
; SSE2-NEXT: # %bb.11:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm6
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: jmp .LBB87_12
|
||||
; SSE2-NEXT: .LBB87_10:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm6
|
||||
; SSE2-NEXT: addss %xmm6, %xmm6
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: addss %xmm4, %xmm4
|
||||
; SSE2-NEXT: .LBB87_12:
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: movq %xmm6, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_13
|
||||
; SSE2-NEXT: # %bb.14:
|
||||
; SSE2-NEXT: xorps %xmm5, %xmm5
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm5
|
||||
; SSE2-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: jmp .LBB87_15
|
||||
; SSE2-NEXT: .LBB87_13:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: xorps %xmm5, %xmm5
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm5
|
||||
; SSE2-NEXT: addss %xmm5, %xmm5
|
||||
; SSE2-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: addss %xmm3, %xmm3
|
||||
; SSE2-NEXT: .LBB87_15:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm6, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_16
|
||||
; SSE2-NEXT: # %bb.17:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm7
|
||||
; SSE2-NEXT: xorps %xmm6, %xmm6
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm6
|
||||
; SSE2-NEXT: jmp .LBB87_18
|
||||
; SSE2-NEXT: .LBB87_16:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm7
|
||||
; SSE2-NEXT: addss %xmm7, %xmm7
|
||||
; SSE2-NEXT: xorps %xmm6, %xmm6
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm6
|
||||
; SSE2-NEXT: addss %xmm6, %xmm6
|
||||
; SSE2-NEXT: .LBB87_18:
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
; SSE2-NEXT: movq %xmm5, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_19
|
||||
; SSE2-NEXT: # %bb.20:
|
||||
@ -4823,9 +4825,9 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB87_21:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_22
|
||||
@ -4843,7 +4845,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
||||
; SSE2-NEXT: addss %xmm2, %xmm2
|
||||
; SSE2-NEXT: .LBB87_24:
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: uitofp_load_8i64_to_8f32:
|
||||
|
Loading…
Reference in New Issue
Block a user