1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[x86] Refactor the tests for popcnt.

Extracted from the D6531 patch by Bruno Cardoso Lopes, and re-generated
to reflect the current state of the world. This should let Bruno's D6531
actually show the delta between the approaches by running the x86 test
case update script after re-building.

llvm-svn: 238391
This commit is contained in:
Chandler Carruth 2015-05-28 02:40:15 +00:00
parent 4193c5f761
commit 521fdf1acb
3 changed files with 2902 additions and 159 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,159 +0,0 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck -check-prefix=AVX2 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
; Vector version of:
; v = v - ((v >> 1) & 0x55555555)
; v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
; v = (v + (v >> 4) & 0xF0F0F0F)
; v = v + (v >> 8)
; v = v + (v >> 16)
; v = v + (v >> 32) ; i64 only
define <8 x i32> @test0(<8 x i32> %x) {
; AVX2-LABEL: @test0
entry:
; AVX2: vpsrld $1, %ymm
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
; AVX2-NEXT: vpsubd
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
; AVX2-NEXT: vpsrld $2
; AVX2-NEXT: vpand
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpsrld $4
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
; AVX2-NEXT: vpsrld $8
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpsrld $16
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
%y = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %x)
ret <8 x i32> %y
}
define <4 x i64> @test1(<4 x i64> %x) {
; AVX2-NOPOPCNT-LABEL: @test1
entry:
; AVX2-NOPOPCNT: vpsrlq $1, %ymm
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsubq
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $2
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $4
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $8
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $16
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $32
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
%y = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
ret <4 x i64> %y
}
define <4 x i32> @test2(<4 x i32> %x) {
; AVX2-NOPOPCNT-LABEL: @test2
; AVX1-NOPOPCNT-LABEL: @test2
entry:
; AVX2-NOPOPCNT: vpsrld $1, %xmm
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsubd
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrld $2
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpsrld $4
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrld $8
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpsrld $16
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT: vpsrld $1, %xmm
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsubd
; AVX1-NOPOPCNT-NEXT: vmovdqa
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrld $2
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpsrld $4
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrld $8
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpsrld $16
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpand
%y = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
ret <4 x i32> %y
}
define <2 x i64> @test3(<2 x i64> %x) {
; AVX2-NOPOPCNT-LABEL: @test3
; AVX1-NOPOPCNT-LABEL: @test3
entry:
; AVX2-NOPOPCNT: vpsrlq $1, %xmm
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsubq
; AVX2-NOPOPCNT-NEXT: vmovdqa
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $2
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $4
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $8
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $16
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $32
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT: vpsrlq $1, %xmm
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsubq
; AVX1-NOPOPCNT-NEXT: vmovdqa
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrlq $2
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpsrlq $4
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrlq $8
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpsrlq $16
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpsrlq $32
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpand
%y = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
ret <2 x i64> %y
}
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)