1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00
llvm-mirror/test/CodeGen/X86/machine-cp.ll
Simon Pilgrim f4315577b8 [X86][SSE] Match zero/any extension shuffles that don't start from the first element
This patch generalizes the lowering of shuffles as zero extensions to allow extensions that don't start from the first element. It now recognises extensions starting anywhere in the lower 128-bits or at the start of any higher 128-bit lane.

The motivation was to reduce the number of high cost pshufb calls, but it also improves the SSE2 case as well.

Differential Revision: http://reviews.llvm.org/D12561

llvm-svn: 248250
2015-09-22 08:16:08 +00:00

110 lines
3.8 KiB
LLVM

; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona -verify-machineinstrs < %s | FileCheck %s
; After tail duplication, two copies in an early exit BB can be cancelled out.
; rdar://10640363
define i32 @t1(i32 %a, i32 %b) nounwind {
entry:
; CHECK-LABEL: t1:
; CHECK: je [[LABEL:.*BB.*]]
%cmp1 = icmp eq i32 %b, 0
br i1 %cmp1, label %while.end, label %while.body
; CHECK: [[LABEL]]:
; CHECK-NOT: mov
; CHECK: ret
while.body: ; preds = %entry, %while.body
%a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
%b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
%rem = srem i32 %a.addr.03, %b.addr.02
%cmp = icmp eq i32 %rem, 0
br i1 %cmp, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
%a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
ret i32 %a.addr.0.lcssa
}
; Two movdqa (from phi-elimination) in the entry BB cancels out.
; rdar://10428165
define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
entry:
; CHECK-LABEL: t2:
; CHECK-NOT: movdqa
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp8
}
define i32 @t3(i64 %a, i64 %b) nounwind {
entry:
; CHECK-LABEL: t3:
; CHECK: je [[LABEL:.*BB.*]]
%cmp1 = icmp eq i64 %b, 0
br i1 %cmp1, label %while.end, label %while.body
; CHECK: [[LABEL]]:
; CHECK-NOT: mov
; CHECK: ret
while.body: ; preds = %entry, %while.body
%a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
%b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
%rem = srem i64 %a.addr.03, %b.addr.02
%cmp = icmp eq i64 %rem, 0
br i1 %cmp, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
%a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
%t = trunc i64 %a.addr.0.lcssa to i32
ret i32 %t
}
; Check that copy propagation does not kill thing like:
; dst = copy src <-- do not kill that.
; ... = op1 dst<undef>
; ... = op2 dst <-- this is used here.
;
; CHECK-LABEL: foo:
; CHECK: psllw $7,
; CHECK: psllw $7, [[SRC1:%xmm[0-9]+]]
; CHECK-NEXT: pand {{.*}}(%rip), [[SRC1]]
; CHECK-NEXT: pcmpgtb [[SRC1]], [[SRC2:%xmm[0-9]+]]
; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC2]]
; CHECK-NEXT: movdqa [[SRC2]], [[CPY1:%xmm[0-9]+]]
; CHECK-NEXT: punpcklbw %xmm{{[0-9]+}}, [[CPY1]]
; Check that CPY1 is not redefined.
; CHECK-NOT: , [[CPY1]]
; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY1]]
; CHECK-NEXT: pslld $31, [[CPY1]]
; CHECK-NEXT: psrad $31, [[CPY1]]
; CHECK: punpckhbw %xmm{{[0-9]+}}, [[CPY2:%xmm[0-9]+]]
; Check that CPY2 is not redefined.
; CHECK-NOT: , [[CPY2]]
; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY2]]
; CHECK-NEXT: pslld $31, [[CPY2]]
; CHECK-NEXT: psrad $31, [[CPY2]]
define <16 x float> @foo(<16 x float> %x) {
bb:
%v3 = icmp slt <16 x i32> undef, zeroinitializer
%v14 = zext <16 x i1> %v3 to <16 x i32>
%v16 = fcmp olt <16 x float> %x, zeroinitializer
%v17 = sext <16 x i1> %v16 to <16 x i32>
%v18 = zext <16 x i1> %v16 to <16 x i32>
%v19 = xor <16 x i32> %v14, %v18
%v20 = or <16 x i32> %v17, undef
%v21 = fptosi <16 x float> %x to <16 x i32>
%v22 = sitofp <16 x i32> %v21 to <16 x float>
%v69 = fcmp ogt <16 x float> %v22, zeroinitializer
%v75 = and <16 x i1> %v69, %v3
%v77 = bitcast <16 x float> %v22 to <16 x i32>
%v79 = sext <16 x i1> %v75 to <16 x i32>
%v80 = and <16 x i32> undef, %v79
%v81 = xor <16 x i32> %v77, %v80
%v82 = and <16 x i32> undef, %v81
%v83 = xor <16 x i32> %v19, %v82
%v84 = and <16 x i32> %v83, %v20
%v85 = xor <16 x i32> %v19, %v84
%v86 = bitcast <16 x i32> %v85 to <16 x float>
ret <16 x float> %v86
}