1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 03:23:01 +02:00

[X86][SSE] Add extractps/pextrd equivalence to domain tables

Differential Revision: https://reviews.llvm.org/D39135

llvm-svn: 316274
This commit is contained in:
Simon Pilgrim 2017-10-21 20:19:48 +00:00
parent c7d811bede
commit b78c13ea43
14 changed files with 87 additions and 91 deletions

View File

@ -9451,6 +9451,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
{ X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
{ X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
{ X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
// AVX 128-bit support
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
@ -9479,6 +9481,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
{ X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
{ X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
{ X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
{ X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
// AVX 256-bit support
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
@ -9577,6 +9581,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
{ X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
{ X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
{ X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr },
{ X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {

View File

@ -49,9 +49,9 @@ entry:
define void @zero_test() {
; X32-LABEL: zero_test:
; X32: # BB#0: # %entry
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pextrd $1, %xmm0, (%eax)
; X32-NEXT: movd %xmm0, (%eax)
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: extractps $1, %xmm0, (%eax)
; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: zero_test:

View File

@ -10,12 +10,12 @@
define <4 x i32> @test(<4 x i32>* %p) {
; CHECK-LABEL: test:
; CHECK: # BB#0:
; CHECK-NEXT: movdqa (%rdi), %xmm0
; CHECK-NEXT: pextrd $2, %xmm0, %eax
; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: extractps $2, %xmm0, %eax
; CHECK-NEXT: cmpl $3, %eax
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # BB#1:
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: retq
%v = load <4 x i32>, <4 x i32>* %p

View File

@ -792,14 +792,14 @@ define i32 @test_mm256_extract_epi32(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_extract_epi32:
; X32: # BB#0:
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
; X32-NEXT: vpextrd $1, %xmm0, %eax
; X32-NEXT: vextractps $1, %xmm0, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_extract_epi32:
; X64: # BB#0:
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
; X64-NEXT: vpextrd $1, %xmm0, %eax
; X64-NEXT: vextractps $1, %xmm0, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
@ -811,8 +811,8 @@ define i64 @test_mm256_extract_epi64(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_extract_epi64:
; X32: # BB#0:
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
; X32-NEXT: vpextrd $2, %xmm0, %eax
; X32-NEXT: vpextrd $3, %xmm0, %edx
; X32-NEXT: vextractps $2, %xmm0, %eax
; X32-NEXT: vextractps $3, %xmm0, %edx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;

View File

@ -424,9 +424,9 @@ define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
; CHECK-LABEL: extract_v16i32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
; CHECK-NEXT: vextractps $1, %xmm0, %eax
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <16 x i32> %x, i32 1
@ -438,9 +438,9 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
; CHECK-LABEL: extract_v8i32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
; CHECK-NEXT: vextractps $1, %xmm0, %eax
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%r1 = extractelement <8 x i32> %x, i32 1
@ -452,8 +452,8 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
; CHECK-LABEL: extract_v4i32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-NEXT: vpextrd $3, %xmm0, (%rdi)
; CHECK-NEXT: vextractps $1, %xmm0, %eax
; CHECK-NEXT: vextractps $3, %xmm0, (%rdi)
; CHECK-NEXT: retq
%r1 = extractelement <4 x i32> %x, i32 1
%r2 = extractelement <4 x i32> %x, i32 3

View File

@ -285,23 +285,23 @@ define void @extract_i32_3(i32* nocapture %dst, <4 x i32> %foo) nounwind {
; SSE41-X32-LABEL: extract_i32_3:
; SSE41-X32: # BB#0:
; SSE41-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE41-X32-NEXT: pextrd $3, %xmm0, (%eax)
; SSE41-X32-NEXT: extractps $3, %xmm0, (%eax)
; SSE41-X32-NEXT: retl
;
; SSE41-X64-LABEL: extract_i32_3:
; SSE41-X64: # BB#0:
; SSE41-X64-NEXT: pextrd $3, %xmm0, (%rdi)
; SSE41-X64-NEXT: extractps $3, %xmm0, (%rdi)
; SSE41-X64-NEXT: retq
;
; AVX-X32-LABEL: extract_i32_3:
; AVX-X32: # BB#0:
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-X32-NEXT: vpextrd $3, %xmm0, (%eax)
; AVX-X32-NEXT: vextractps $3, %xmm0, (%eax)
; AVX-X32-NEXT: retl
;
; AVX-X64-LABEL: extract_i32_3:
; AVX-X64: # BB#0:
; AVX-X64-NEXT: vpextrd $3, %xmm0, (%rdi)
; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
;
; SSE-F128-LABEL: extract_i32_3:

View File

@ -231,12 +231,12 @@ define i32 @extractelement_v4i32_3(<4 x i32> %a) nounwind {
;
; SSE41-LABEL: extractelement_v4i32_3:
; SSE41: # BB#0:
; SSE41-NEXT: pextrd $3, %xmm0, %eax
; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: retq
;
; AVX-LABEL: extractelement_v4i32_3:
; AVX: # BB#0:
; AVX-NEXT: vpextrd $3, %xmm0, %eax
; AVX-NEXT: vextractps $3, %xmm0, %eax
; AVX-NEXT: retq
%b = extractelement <4 x i32> %a, i256 3
ret i32 %b
@ -297,22 +297,15 @@ define i32 @extractelement_v8i32_7(<8 x i32> %a) nounwind {
;
; SSE41-LABEL: extractelement_v8i32_7:
; SSE41: # BB#0:
; SSE41-NEXT: pextrd $3, %xmm1, %eax
; SSE41-NEXT: extractps $3, %xmm1, %eax
; SSE41-NEXT: retq
;
; AVX1-LABEL: extractelement_v8i32_7:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: extractelement_v8i32_7:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vpextrd $3, %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX-LABEL: extractelement_v8i32_7:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vextractps $3, %xmm0, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%b = extractelement <8 x i32> %a, i64 7
ret i32 %b
}

View File

@ -76,7 +76,7 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind {
; X32-LABEL: signbits_ashr_extract_sitofp:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: vpextrd $1, %xmm0, %eax
; X32-NEXT: vextractps $1, %xmm0, %eax
; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)

View File

@ -541,19 +541,19 @@ define void @test_extract_i32(<4 x i32> %arg, i32* %dst) {
;
; SSE41-LABEL: test_extract_i32:
; SSE41: # BB#0:
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: extractps $1, %xmm0, %eax
; SSE41-NEXT: movntil %eax, (%rdi)
; SSE41-NEXT: retq
;
; AVX-LABEL: test_extract_i32:
; AVX: # BB#0:
; AVX-NEXT: vpextrd $1, %xmm0, %eax
; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: movntil %eax, (%rdi)
; AVX-NEXT: retq
;
; VLX-LABEL: test_extract_i32:
; VLX: # BB#0:
; VLX-NEXT: vpextrd $1, %xmm0, %eax
; VLX-NEXT: vextractps $1, %xmm0, %eax
; VLX-NEXT: movntil %eax, (%rdi)
; VLX-NEXT: retq
%1 = extractelement <4 x i32> %arg, i32 1

View File

@ -112,10 +112,10 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind {
;
; AVX2-LABEL: v3i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi)
; AVX2-NEXT: vmovq %xmm1, (%rdi)
; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi)
; AVX2-NEXT: vmovlps %xmm1, (%rdi)
; AVX2-NEXT: retq
;
; XOP-LABEL: v3i32:
@ -199,18 +199,18 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind {
; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi)
; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi)
; AVX1-NEXT: vmovaps %xmm1, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: v5i32:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpextrd $3, %xmm0, 16(%rdi)
; AVX2-NEXT: vmovdqa %xmm1, (%rdi)
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi)
; AVX2-NEXT: vmovaps %xmm1, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -218,7 +218,7 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind {
; XOP: # BB#0:
; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
; XOP-NEXT: vpextrd $3, %xmm0, 16(%rdi)
; XOP-NEXT: vextractps $3, %xmm0, 16(%rdi)
; XOP-NEXT: vmovaps %xmm1, (%rdi)
; XOP-NEXT: retq
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> <i32 0, i32 5, i32 1, i32 6, i32 3>

View File

@ -440,12 +440,12 @@ define i32 @test_mm_extract_epi8(<2 x i64> %a0) {
define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_extract_epi32:
; X32: # BB#0:
; X32-NEXT: pextrd $1, %xmm0, %eax
; X32-NEXT: extractps $1, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_extract_epi32:
; X64: # BB#0:
; X64-NEXT: pextrd $1, %xmm0, %eax
; X64-NEXT: extractps $1, %xmm0, %eax
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%ext = extractelement <4 x i32> %arg0, i32 1
@ -455,8 +455,8 @@ define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
define i64 @test_mm_extract_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_extract_epi64:
; X32: # BB#0:
; X32-NEXT: pextrd $2, %xmm0, %eax
; X32-NEXT: pextrd $3, %xmm0, %edx
; X32-NEXT: extractps $2, %xmm0, %eax
; X32-NEXT: extractps $3, %xmm0, %edx
; X32-NEXT: retl
;
; X64-LABEL: test_mm_extract_epi64:

View File

@ -949,61 +949,71 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; GENERIC-LABEL: test_pextrd:
; GENERIC: # BB#0:
; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SLM-LABEL: test_pextrd:
; SLM: # BB#0:
; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00]
; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_pextrd:
; SANDY: # BB#0:
; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pextrd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BROADWELL-LABEL: test_pextrd:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [2:1.00]
;
; SKYLAKE-LABEL: test_pextrd:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pextrd:
; SKX: # BB#0:
; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_pextrd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pextrd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.25]
; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = extractelement <4 x i32> %a0, i32 3
%2 = extractelement <4 x i32> %a0, i32 1
store i32 %2, i32 *%a1
ret i32 %1
%1 = add <4 x i32> %a0, %a0
%2 = extractelement <4 x i32> %1, i32 3
%3 = extractelement <4 x i32> %1, i32 1
store i32 %3, i32 *%a1
ret i32 %2
}
define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {

View File

@ -108,6 +108,7 @@ define float @ext_1(<4 x float> %v) nounwind {
%t = fadd float %s, 1.0
ret float %t
}
define float @ext_2(<4 x float> %v) nounwind {
; X32-LABEL: ext_2:
; X32: ## BB#0:
@ -125,15 +126,16 @@ define float @ext_2(<4 x float> %v) nounwind {
%s = extractelement <4 x float> %v, i32 3
ret float %s
}
define i32 @ext_3(<4 x i32> %v) nounwind {
; X32-LABEL: ext_3:
; X32: ## BB#0:
; X32-NEXT: pextrd $3, %xmm0, %eax
; X32-NEXT: extractps $3, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: ext_3:
; X64: ## BB#0:
; X64-NEXT: pextrd $3, %xmm0, %eax
; X64-NEXT: extractps $3, %xmm0, %eax
; X64-NEXT: retq
%i = extractelement <4 x i32> %v, i32 3
ret i32 %i
@ -261,7 +263,6 @@ define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
ret i32 %tmp1
}
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone

View File

@ -25,33 +25,19 @@ define <7 x i64> @load7_aligned(<7 x i64>* %x) {
; X86-SSE-NEXT: movaps %xmm0, (%eax)
; X86-SSE-NEXT: retl $4
;
; X86-AVX1-LABEL: load7_aligned:
; X86-AVX1: # BB#0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX1-NEXT: vmovaps (%ecx), %ymm0
; X86-AVX1-NEXT: vmovaps 32(%ecx), %ymm1
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; X86-AVX1-NEXT: vpextrd $1, %xmm0, 52(%eax)
; X86-AVX1-NEXT: vmovd %xmm0, 48(%eax)
; X86-AVX1-NEXT: vmovaps %xmm1, 32(%eax)
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl $4
;
; X86-AVX2-LABEL: load7_aligned:
; X86-AVX2: # BB#0:
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX2-NEXT: vmovaps (%ecx), %ymm0
; X86-AVX2-NEXT: vmovdqa 32(%ecx), %ymm1
; X86-AVX2-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0
; X86-AVX2-NEXT: vpextrd $1, %xmm0, 52(%eax)
; X86-AVX2-NEXT: vmovd %xmm0, 48(%eax)
; X86-AVX2-NEXT: vmovdqa %xmm1, 32(%eax)
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl $4
; X86-AVX-LABEL: load7_aligned:
; X86-AVX: # BB#0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: vmovaps (%ecx), %ymm0
; X86-AVX-NEXT: vmovaps 32(%ecx), %ymm1
; X86-AVX-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0
; X86-AVX-NEXT: vextractps $1, %xmm0, 52(%eax)
; X86-AVX-NEXT: vmovss %xmm0, 48(%eax)
; X86-AVX-NEXT: vmovaps %xmm1, 32(%eax)
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl $4
;
; X64-SSE-LABEL: load7_aligned:
; X64-SSE: # BB#0: