mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86][SSE] Add extractps/pextrd equivalence to domain tables
Differential Revision: https://reviews.llvm.org/D39135 llvm-svn: 316274
This commit is contained in:
parent
c7d811bede
commit
b78c13ea43
@ -9451,6 +9451,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
|
||||
{ X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
|
||||
{ X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
|
||||
{ X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
|
||||
{ X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
|
||||
{ X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
|
||||
// AVX 128-bit support
|
||||
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
|
||||
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
|
||||
@ -9479,6 +9481,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
|
||||
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
|
||||
{ X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
|
||||
{ X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
|
||||
{ X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
|
||||
{ X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
|
||||
// AVX 256-bit support
|
||||
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
|
||||
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
|
||||
@ -9577,6 +9581,8 @@ static const uint16_t ReplaceableInstrs[][3] = {
|
||||
{ X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
|
||||
{ X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
|
||||
{ X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
|
||||
{ X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr },
|
||||
{ X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr },
|
||||
};
|
||||
|
||||
static const uint16_t ReplaceableInstrsAVX2[][3] = {
|
||||
|
@ -49,9 +49,9 @@ entry:
|
||||
define void @zero_test() {
|
||||
; X32-LABEL: zero_test:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: pxor %xmm0, %xmm0
|
||||
; X32-NEXT: pextrd $1, %xmm0, (%eax)
|
||||
; X32-NEXT: movd %xmm0, (%eax)
|
||||
; X32-NEXT: xorps %xmm0, %xmm0
|
||||
; X32-NEXT: extractps $1, %xmm0, (%eax)
|
||||
; X32-NEXT: movss %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: zero_test:
|
||||
|
@ -10,12 +10,12 @@
|
||||
define <4 x i32> @test(<4 x i32>* %p) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movdqa (%rdi), %xmm0
|
||||
; CHECK-NEXT: pextrd $2, %xmm0, %eax
|
||||
; CHECK-NEXT: movaps (%rdi), %xmm0
|
||||
; CHECK-NEXT: extractps $2, %xmm0, %eax
|
||||
; CHECK-NEXT: cmpl $3, %eax
|
||||
; CHECK-NEXT: je .LBB0_2
|
||||
; CHECK-NEXT: # BB#1:
|
||||
; CHECK-NEXT: pxor %xmm0, %xmm0
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: retq
|
||||
%v = load <4 x i32>, <4 x i32>* %p
|
||||
|
@ -792,14 +792,14 @@ define i32 @test_mm256_extract_epi32(<4 x i64> %a0) nounwind {
|
||||
; X32-LABEL: test_mm256_extract_epi32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X32-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; X32-NEXT: vextractps $1, %xmm0, %eax
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_extract_epi32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X64-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; X64-NEXT: vextractps $1, %xmm0, %eax
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
||||
@ -811,8 +811,8 @@ define i64 @test_mm256_extract_epi64(<4 x i64> %a0) nounwind {
|
||||
; X32-LABEL: test_mm256_extract_epi64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X32-NEXT: vpextrd $2, %xmm0, %eax
|
||||
; X32-NEXT: vpextrd $3, %xmm0, %edx
|
||||
; X32-NEXT: vextractps $2, %xmm0, %eax
|
||||
; X32-NEXT: vextractps $3, %xmm0, %edx
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
@ -424,9 +424,9 @@ define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
|
||||
define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
|
||||
; CHECK-LABEL: extract_v16i32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, %eax
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%r1 = extractelement <16 x i32> %x, i32 1
|
||||
@ -438,9 +438,9 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
|
||||
define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
|
||||
; CHECK-LABEL: extract_v8i32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, %eax
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%r1 = extractelement <8 x i32> %x, i32 1
|
||||
@ -452,8 +452,8 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
|
||||
define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
|
||||
; CHECK-LABEL: extract_v4i32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; CHECK-NEXT: vpextrd $3, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vextractps $1, %xmm0, %eax
|
||||
; CHECK-NEXT: vextractps $3, %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%r1 = extractelement <4 x i32> %x, i32 1
|
||||
%r2 = extractelement <4 x i32> %x, i32 3
|
||||
|
@ -285,23 +285,23 @@ define void @extract_i32_3(i32* nocapture %dst, <4 x i32> %foo) nounwind {
|
||||
; SSE41-X32-LABEL: extract_i32_3:
|
||||
; SSE41-X32: # BB#0:
|
||||
; SSE41-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE41-X32-NEXT: pextrd $3, %xmm0, (%eax)
|
||||
; SSE41-X32-NEXT: extractps $3, %xmm0, (%eax)
|
||||
; SSE41-X32-NEXT: retl
|
||||
;
|
||||
; SSE41-X64-LABEL: extract_i32_3:
|
||||
; SSE41-X64: # BB#0:
|
||||
; SSE41-X64-NEXT: pextrd $3, %xmm0, (%rdi)
|
||||
; SSE41-X64-NEXT: extractps $3, %xmm0, (%rdi)
|
||||
; SSE41-X64-NEXT: retq
|
||||
;
|
||||
; AVX-X32-LABEL: extract_i32_3:
|
||||
; AVX-X32: # BB#0:
|
||||
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX-X32-NEXT: vpextrd $3, %xmm0, (%eax)
|
||||
; AVX-X32-NEXT: vextractps $3, %xmm0, (%eax)
|
||||
; AVX-X32-NEXT: retl
|
||||
;
|
||||
; AVX-X64-LABEL: extract_i32_3:
|
||||
; AVX-X64: # BB#0:
|
||||
; AVX-X64-NEXT: vpextrd $3, %xmm0, (%rdi)
|
||||
; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi)
|
||||
; AVX-X64-NEXT: retq
|
||||
;
|
||||
; SSE-F128-LABEL: extract_i32_3:
|
||||
|
@ -231,12 +231,12 @@ define i32 @extractelement_v4i32_3(<4 x i32> %a) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: extractelement_v4i32_3:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pextrd $3, %xmm0, %eax
|
||||
; SSE41-NEXT: extractps $3, %xmm0, %eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v4i32_3:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; AVX-NEXT: vextractps $3, %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
%b = extractelement <4 x i32> %a, i256 3
|
||||
ret i32 %b
|
||||
@ -297,22 +297,15 @@ define i32 @extractelement_v8i32_7(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: extractelement_v8i32_7:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pextrd $3, %xmm1, %eax
|
||||
; SSE41-NEXT: extractps $3, %xmm1, %eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: extractelement_v8i32_7:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: extractelement_v8i32_7:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: extractelement_v8i32_7:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX-NEXT: vextractps $3, %xmm0, %eax
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
%b = extractelement <8 x i32> %a, i64 7
|
||||
ret i32 %b
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind {
|
||||
; X32-LABEL: signbits_ashr_extract_sitofp:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; X32-NEXT: vextractps $1, %xmm0, %eax
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
|
@ -541,19 +541,19 @@ define void @test_extract_i32(<4 x i32> %arg, i32* %dst) {
|
||||
;
|
||||
; SSE41-LABEL: test_extract_i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pextrd $1, %xmm0, %eax
|
||||
; SSE41-NEXT: extractps $1, %xmm0, %eax
|
||||
; SSE41-NEXT: movntil %eax, (%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_extract_i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; AVX-NEXT: vextractps $1, %xmm0, %eax
|
||||
; AVX-NEXT: movntil %eax, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; VLX-LABEL: test_extract_i32:
|
||||
; VLX: # BB#0:
|
||||
; VLX-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; VLX-NEXT: vextractps $1, %xmm0, %eax
|
||||
; VLX-NEXT: movntil %eax, (%rdi)
|
||||
; VLX-NEXT: retq
|
||||
%1 = extractelement <4 x i32> %arg, i32 1
|
||||
|
@ -112,10 +112,10 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: v3i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi)
|
||||
; AVX2-NEXT: vmovq %xmm1, (%rdi)
|
||||
; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
|
||||
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi)
|
||||
; AVX2-NEXT: vmovlps %xmm1, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: v3i32:
|
||||
@ -199,18 +199,18 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
|
||||
; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi)
|
||||
; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi)
|
||||
; AVX1-NEXT: vmovaps %xmm1, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: v5i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
|
||||
; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; AVX2-NEXT: vpextrd $3, %xmm0, 16(%rdi)
|
||||
; AVX2-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
|
||||
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
|
||||
; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi)
|
||||
; AVX2-NEXT: vmovaps %xmm1, (%rdi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -218,7 +218,7 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) nounwind {
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
|
||||
; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
|
||||
; XOP-NEXT: vpextrd $3, %xmm0, 16(%rdi)
|
||||
; XOP-NEXT: vextractps $3, %xmm0, 16(%rdi)
|
||||
; XOP-NEXT: vmovaps %xmm1, (%rdi)
|
||||
; XOP-NEXT: retq
|
||||
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> <i32 0, i32 5, i32 1, i32 6, i32 3>
|
||||
|
@ -440,12 +440,12 @@ define i32 @test_mm_extract_epi8(<2 x i64> %a0) {
|
||||
define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
|
||||
; X32-LABEL: test_mm_extract_epi32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pextrd $1, %xmm0, %eax
|
||||
; X32-NEXT: extractps $1, %xmm0, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_extract_epi32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pextrd $1, %xmm0, %eax
|
||||
; X64-NEXT: extractps $1, %xmm0, %eax
|
||||
; X64-NEXT: retq
|
||||
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
|
||||
%ext = extractelement <4 x i32> %arg0, i32 1
|
||||
@ -455,8 +455,8 @@ define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
|
||||
define i64 @test_mm_extract_epi64(<2 x i64> %a0) {
|
||||
; X32-LABEL: test_mm_extract_epi64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pextrd $2, %xmm0, %eax
|
||||
; X32-NEXT: pextrd $3, %xmm0, %edx
|
||||
; X32-NEXT: extractps $2, %xmm0, %eax
|
||||
; X32-NEXT: extractps $3, %xmm0, %edx
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_extract_epi64:
|
||||
|
@ -949,61 +949,71 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
|
||||
define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
|
||||
; GENERIC-LABEL: test_pextrd:
|
||||
; GENERIC: # BB#0:
|
||||
; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
|
||||
; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SLM-LABEL: test_pextrd:
|
||||
; SLM: # BB#0:
|
||||
; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00]
|
||||
; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
|
||||
; SLM-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: test_pextrd:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
|
||||
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: test_pextrd:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
|
||||
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||
;
|
||||
; BROADWELL-LABEL: test_pextrd:
|
||||
; BROADWELL: # BB#0:
|
||||
; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
|
||||
; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||
; BROADWELL-NEXT: retq # sched: [2:1.00]
|
||||
;
|
||||
; SKYLAKE-LABEL: test_pextrd:
|
||||
; SKYLAKE: # BB#0:
|
||||
; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKX-LABEL: test_pextrd:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
|
||||
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; BTVER2-LABEL: test_pextrd:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
|
||||
; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; ZNVER1-LABEL: test_pextrd:
|
||||
; ZNVER1: # BB#0:
|
||||
; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
|
||||
; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.25]
|
||||
; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00]
|
||||
; ZNVER1-NEXT: retq # sched: [1:0.50]
|
||||
%1 = extractelement <4 x i32> %a0, i32 3
|
||||
%2 = extractelement <4 x i32> %a0, i32 1
|
||||
store i32 %2, i32 *%a1
|
||||
ret i32 %1
|
||||
%1 = add <4 x i32> %a0, %a0
|
||||
%2 = extractelement <4 x i32> %1, i32 3
|
||||
%3 = extractelement <4 x i32> %1, i32 1
|
||||
store i32 %3, i32 *%a1
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
|
||||
|
@ -108,6 +108,7 @@ define float @ext_1(<4 x float> %v) nounwind {
|
||||
%t = fadd float %s, 1.0
|
||||
ret float %t
|
||||
}
|
||||
|
||||
define float @ext_2(<4 x float> %v) nounwind {
|
||||
; X32-LABEL: ext_2:
|
||||
; X32: ## BB#0:
|
||||
@ -125,15 +126,16 @@ define float @ext_2(<4 x float> %v) nounwind {
|
||||
%s = extractelement <4 x float> %v, i32 3
|
||||
ret float %s
|
||||
}
|
||||
|
||||
define i32 @ext_3(<4 x i32> %v) nounwind {
|
||||
; X32-LABEL: ext_3:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: pextrd $3, %xmm0, %eax
|
||||
; X32-NEXT: extractps $3, %xmm0, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: ext_3:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: pextrd $3, %xmm0, %eax
|
||||
; X64-NEXT: extractps $3, %xmm0, %eax
|
||||
; X64-NEXT: retq
|
||||
%i = extractelement <4 x i32> %v, i32 3
|
||||
ret i32 %i
|
||||
@ -261,7 +263,6 @@ define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
|
||||
ret i32 %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
@ -25,33 +25,19 @@ define <7 x i64> @load7_aligned(<7 x i64>* %x) {
|
||||
; X86-SSE-NEXT: movaps %xmm0, (%eax)
|
||||
; X86-SSE-NEXT: retl $4
|
||||
;
|
||||
; X86-AVX1-LABEL: load7_aligned:
|
||||
; X86-AVX1: # BB#0:
|
||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX1-NEXT: vmovaps (%ecx), %ymm0
|
||||
; X86-AVX1-NEXT: vmovaps 32(%ecx), %ymm1
|
||||
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, 52(%eax)
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, 48(%eax)
|
||||
; X86-AVX1-NEXT: vmovaps %xmm1, 32(%eax)
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl $4
|
||||
;
|
||||
; X86-AVX2-LABEL: load7_aligned:
|
||||
; X86-AVX2: # BB#0:
|
||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX2-NEXT: vmovaps (%ecx), %ymm0
|
||||
; X86-AVX2-NEXT: vmovdqa 32(%ecx), %ymm1
|
||||
; X86-AVX2-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, 52(%eax)
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, 48(%eax)
|
||||
; X86-AVX2-NEXT: vmovdqa %xmm1, 32(%eax)
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl $4
|
||||
; X86-AVX-LABEL: load7_aligned:
|
||||
; X86-AVX: # BB#0:
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: vmovaps (%ecx), %ymm0
|
||||
; X86-AVX-NEXT: vmovaps 32(%ecx), %ymm1
|
||||
; X86-AVX-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X86-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; X86-AVX-NEXT: vextractps $1, %xmm0, 52(%eax)
|
||||
; X86-AVX-NEXT: vmovss %xmm0, 48(%eax)
|
||||
; X86-AVX-NEXT: vmovaps %xmm1, 32(%eax)
|
||||
; X86-AVX-NEXT: vzeroupper
|
||||
; X86-AVX-NEXT: retl $4
|
||||
;
|
||||
; X64-SSE-LABEL: load7_aligned:
|
||||
; X64-SSE: # BB#0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user