1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/sse3.ll
Simon Pilgrim 915cb13dec [SelectionDAG] Enable SimplifyDemandedVectorElts support for simplifying shuffle masks
Based off the DemandedElts mask the and UNDEF elements returned from the SimplifyDemandedVectorElts calls to the shuffle operands, we can attempt to simplify the shuffle mask.

I had to be very conservative here as accepting post-legalized shuffle masks could cause problems for targets that legalize UNDEF mask elements back to inrange values (PowerPC), similarly combining to identity shuffle masks could cause too much UNDEF information to disappear for later combines.

llvm-svn: 325354
2018-02-16 16:22:14 +00:00

418 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=X64
; These are tests for SSE3 codegen.
; Test for v8xi16 lowering where we extract the first element of the vector and
; placed it in the second element of the result.
define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
; X86-LABEL: t0:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: movd %edx, %xmm0
; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: t0:
; X64: # %bb.0: # %entry
; X64-NEXT: movl $1, %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
entry:
%tmp3 = load <8 x i16>, <8 x i16>* %old
%tmp6 = shufflevector <8 x i16> %tmp3,
<8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
<8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
store <8 x i16> %tmp6, <8 x i16>* %dest
ret void
}
define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; X86-LABEL: t1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,65535]
; X86-NEXT: movaps %xmm0, %xmm1
; X86-NEXT: andnps (%ecx), %xmm1
; X86-NEXT: andps (%eax), %xmm0
; X86-NEXT: orps %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,65535]
; X64-NEXT: movaps %xmm0, %xmm1
; X64-NEXT: andnps (%rsi), %xmm1
; X64-NEXT: andps (%rdi), %xmm0
; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp3
}
define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t2:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535]
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,1,4,5,6,7]
; X86-NEXT: pandn %xmm1, %xmm2
; X86-NEXT: por %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535]
; X64-NEXT: pand %xmm2, %xmm0
; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,1,4,5,6,7]
; X64-NEXT: pandn %xmm1, %xmm2
; X64-NEXT: por %xmm2, %xmm0
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
}
define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t3:
; X86: # %bb.0:
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,5]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; X86-NEXT: retl
;
; X64-LABEL: t3:
; X64: # %bb.0:
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,5]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
ret <8 x i16> %tmp
}
define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t4:
; X86: # %bb.0:
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7]
; X86-NEXT: retl
;
; X64-LABEL: t4:
; X64: # %bb.0:
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7]
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
ret <8 x i16> %tmp
}
define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t5:
; X86: # %bb.0:
; X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movaps %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t5:
; X64: # %bb.0:
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
ret <8 x i16> %tmp
}
define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t6:
; X86: # %bb.0:
; X86-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X86-NEXT: retl
;
; X64-LABEL: t6:
; X64: # %bb.0:
; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
}
define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t7:
; X86: # %bb.0:
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
; X86-NEXT: retl
;
; X64-LABEL: t7:
; X64: # %bb.0:
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
ret <8 x i16> %tmp
}
define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
; X86-LABEL: t8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: pshuflw {{.*#+}} xmm0 = mem[2,1,0,3,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: t8:
; X64: # %bb.0:
; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[2,1,0,3,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
%tmp = load <2 x i64>, <2 x i64>* %A
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
ret void
}
define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
; X86-LABEL: t9:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movapd (%ecx), %xmm0
; X86-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X86-NEXT: movapd %xmm0, (%ecx)
; X86-NEXT: retl
;
; X64-LABEL: t9:
; X64: # %bb.0:
; X64-NEXT: movapd (%rdi), %xmm0
; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X64-NEXT: movapd %xmm0, (%rdi)
; X64-NEXT: retq
%tmp = load <4 x float>, <4 x float>* %r
%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
%tmp.upgrd.4 = load double, double* %tmp.upgrd.3
%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1
%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0
%tmp7 = extractelement <4 x float> %tmp, i32 1
%tmp8 = extractelement <4 x float> %tmp6, i32 0
%tmp9 = extractelement <4 x float> %tmp6, i32 1
%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0
%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
store <4 x float> %tmp13, <4 x float>* %r
ret void
}
; FIXME: This testcase produces icky code. It can be made much better!
; PR2585
@g1 = external constant <4 x i32>
@g2 = external constant <4 x i16>
define void @t10() nounwind {
; X86-LABEL: t10:
; X86: # %bb.0:
; X86-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: movq %xmm0, g2
; X86-NEXT: retl
;
; X64-LABEL: t10:
; X64: # %bb.0:
; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: movq %xmm0, {{.*}}(%rip)
; X64-NEXT: retq
load <4 x i32>, <4 x i32>* @g1, align 16
bitcast <4 x i32> %1 to <8 x i16>
shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
bitcast <8 x i16> %3 to <2 x i64>
extractelement <2 x i64> %4, i32 0
bitcast i64 %5 to <4 x i16>
store <4 x i16> %6, <4 x i16>* @g2, align 8
ret void
}
; Pack various elements via shuffles.
define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X86-LABEL: t11:
; X86: # %bb.0: # %entry
; X86-NEXT: psrld $16, %xmm0
; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-NEXT: retl
;
; X64-LABEL: t11:
; X64: # %bb.0: # %entry
; X64-NEXT: psrld $16, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: retq
entry:
%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp7
}
define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X86-LABEL: t12:
; X86: # %bb.0: # %entry
; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X86-NEXT: retl
;
; X64-LABEL: t12:
; X64: # %bb.0: # %entry
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X64-NEXT: retq
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
ret <8 x i16> %tmp9
}
define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X86-LABEL: t13:
; X86: # %bb.0: # %entry
; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X86-NEXT: retl
;
; X64-LABEL: t13:
; X64: # %bb.0: # %entry
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X64-NEXT: retq
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
ret <8 x i16> %tmp9
}
define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X86-LABEL: t14:
; X86: # %bb.0: # %entry
; X86-NEXT: psrlq $16, %xmm0
; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t14:
; X64: # %bb.0: # %entry
; X64-NEXT: psrlq $16, %xmm0
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
ret <8 x i16> %tmp9
}
; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X86-LABEL: t15:
; X86: # %bb.0: # %entry
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
; X86-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-NEXT: retl
;
; X64-LABEL: t15:
; X64: # %bb.0: # %entry
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
entry:
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp8
}
; Test yonah where we convert a shuffle to pextrw and pinrsw
define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
; X86-LABEL: t16:
; X86: # %bb.0: # %entry
; X86-NEXT: pslld $16, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t16:
; X64: # %bb.0: # %entry
; X64-NEXT: pslld $16, %xmm0
; X64-NEXT: retq
entry:
%tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
%tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
ret <16 x i8> %tmp9
}
; rdar://8520311
define <4 x i32> @t17() nounwind {
; X86-LABEL: t17:
; X86: # %bb.0: # %entry
; X86-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; X86-NEXT: andpd {{\.LCPI.*}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t17:
; X64: # %bb.0: # %entry
; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: andpd {{.*}}(%rip), %xmm0
; X64-NEXT: retq
entry:
%tmp1 = load <4 x float>, <4 x float>* undef, align 16
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
%tmp3 = load <4 x float>, <4 x float>* undef, align 16
%tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
%tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
%tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
%tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
ret <4 x i32> %tmp7
}