1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[X86] Dropped XOP ctbits checks - they match the AVX checks

llvm-svn: 277718
This commit is contained in:
Simon Pilgrim 2016-08-04 11:04:13 +00:00
parent 70f3672799
commit f889b79772

View File

@ -2,8 +2,8 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX1 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
; Verify the cost of scalar population count instructions.
@ -61,7 +61,6 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
; SSE2: Found an estimated cost of 12 for instruction: %ctpop
; SSE42: Found an estimated cost of 7 for instruction: %ctpop
; AVX: Found an estimated cost of 7 for instruction: %ctpop
; XOP: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %ctpop
}
@ -72,8 +71,6 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
; SSE42: Found an estimated cost of 14 for instruction: %ctpop
; AVX1: Found an estimated cost of 14 for instruction: %ctpop
; AVX2: Found an estimated cost of 7 for instruction: %ctpop
; XOPAVX1: Found an estimated cost of 14 for instruction: %ctpop
; XOPAVX2: Found an estimated cost of 7 for instruction: %ctpop
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
ret <4 x i64> %ctpop
}
@ -83,7 +80,6 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
; SSE2: Found an estimated cost of 15 for instruction: %ctpop
; SSE42: Found an estimated cost of 11 for instruction: %ctpop
; AVX: Found an estimated cost of 11 for instruction: %ctpop
; XOP: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
ret <4 x i32> %ctpop
}
@ -94,8 +90,6 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
; SSE42: Found an estimated cost of 22 for instruction: %ctpop
; AVX1: Found an estimated cost of 22 for instruction: %ctpop
; AVX2: Found an estimated cost of 11 for instruction: %ctpop
; XOPAVX1: Found an estimated cost of 22 for instruction: %ctpop
; XOPAVX2: Found an estimated cost of 11 for instruction: %ctpop
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
ret <8 x i32> %ctpop
}
@ -105,7 +99,6 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
; SSE2: Found an estimated cost of 13 for instruction: %ctpop
; SSE42: Found an estimated cost of 9 for instruction: %ctpop
; AVX: Found an estimated cost of 9 for instruction: %ctpop
; XOP: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
ret <8 x i16> %ctpop
}
@ -116,8 +109,6 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
; SSE42: Found an estimated cost of 18 for instruction: %ctpop
; AVX1: Found an estimated cost of 18 for instruction: %ctpop
; AVX2: Found an estimated cost of 9 for instruction: %ctpop
; XOPAVX1: Found an estimated cost of 18 for instruction: %ctpop
; XOPAVX2: Found an estimated cost of 9 for instruction: %ctpop
%ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
ret <16 x i16> %ctpop
}
@ -127,7 +118,6 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
; SSE2: Found an estimated cost of 10 for instruction: %ctpop
; SSE42: Found an estimated cost of 6 for instruction: %ctpop
; AVX: Found an estimated cost of 6 for instruction: %ctpop
; XOP: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
ret <16 x i8> %ctpop
}
@ -138,8 +128,6 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
; SSE42: Found an estimated cost of 12 for instruction: %ctpop
; AVX1: Found an estimated cost of 12 for instruction: %ctpop
; AVX2: Found an estimated cost of 6 for instruction: %ctpop
; XOPAVX1: Found an estimated cost of 12 for instruction: %ctpop
; XOPAVX2: Found an estimated cost of 6 for instruction: %ctpop
%ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
ret <32 x i8> %ctpop
}
@ -224,7 +212,6 @@ define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
; SSE2: Found an estimated cost of 6 for instruction: %ctlz
; SSE42: Found an estimated cost of 23 for instruction: %ctlz
; AVX: Found an estimated cost of 23 for instruction: %ctlz
; XOP: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
ret <2 x i64> %ctlz
}
@ -234,7 +221,6 @@ define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
; SSE2: Found an estimated cost of 6 for instruction: %ctlz
; SSE42: Found an estimated cost of 23 for instruction: %ctlz
; AVX: Found an estimated cost of 23 for instruction: %ctlz
; XOP: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
ret <2 x i64> %ctlz
}
@ -245,8 +231,6 @@ define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
; SSE42: Found an estimated cost of 46 for instruction: %ctlz
; AVX1: Found an estimated cost of 46 for instruction: %ctlz
; AVX2: Found an estimated cost of 23 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 46 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
ret <4 x i64> %ctlz
}
@ -257,8 +241,6 @@ define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
; SSE42: Found an estimated cost of 46 for instruction: %ctlz
; AVX1: Found an estimated cost of 46 for instruction: %ctlz
; AVX2: Found an estimated cost of 23 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 46 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 23 for instruction: %ctlz
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
ret <4 x i64> %ctlz
}
@ -268,7 +250,6 @@ define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
; SSE2: Found an estimated cost of 12 for instruction: %ctlz
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX: Found an estimated cost of 18 for instruction: %ctlz
; XOP: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
ret <4 x i32> %ctlz
}
@ -278,7 +259,6 @@ define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
; SSE2: Found an estimated cost of 12 for instruction: %ctlz
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX: Found an estimated cost of 18 for instruction: %ctlz
; XOP: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
ret <4 x i32> %ctlz
}
@ -289,8 +269,6 @@ define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
; SSE42: Found an estimated cost of 36 for instruction: %ctlz
; AVX1: Found an estimated cost of 36 for instruction: %ctlz
; AVX2: Found an estimated cost of 18 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 36 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
ret <8 x i32> %ctlz
}
@ -301,8 +279,6 @@ define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
; SSE42: Found an estimated cost of 36 for instruction: %ctlz
; AVX1: Found an estimated cost of 36 for instruction: %ctlz
; AVX2: Found an estimated cost of 18 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 36 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 18 for instruction: %ctlz
%ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
ret <8 x i32> %ctlz
}
@ -312,7 +288,6 @@ define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
; SSE2: Found an estimated cost of 24 for instruction: %ctlz
; SSE42: Found an estimated cost of 14 for instruction: %ctlz
; AVX: Found an estimated cost of 14 for instruction: %ctlz
; XOP: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %ctlz
}
@ -322,7 +297,6 @@ define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
; SSE2: Found an estimated cost of 24 for instruction: %ctlz
; SSE42: Found an estimated cost of 14 for instruction: %ctlz
; AVX: Found an estimated cost of 14 for instruction: %ctlz
; XOP: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %ctlz
}
@ -333,8 +307,6 @@ define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
; SSE42: Found an estimated cost of 28 for instruction: %ctlz
; AVX1: Found an estimated cost of 28 for instruction: %ctlz
; AVX2: Found an estimated cost of 14 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 28 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %ctlz
}
@ -345,8 +317,6 @@ define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
; SSE42: Found an estimated cost of 28 for instruction: %ctlz
; AVX1: Found an estimated cost of 28 for instruction: %ctlz
; AVX2: Found an estimated cost of 14 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 28 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 14 for instruction: %ctlz
%ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %ctlz
}
@ -356,7 +326,6 @@ define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
; SSE2: Found an estimated cost of 48 for instruction: %ctlz
; SSE42: Found an estimated cost of 9 for instruction: %ctlz
; AVX: Found an estimated cost of 9 for instruction: %ctlz
; XOP: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %ctlz
}
@ -366,7 +335,6 @@ define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
; SSE2: Found an estimated cost of 48 for instruction: %ctlz
; SSE42: Found an estimated cost of 9 for instruction: %ctlz
; AVX: Found an estimated cost of 9 for instruction: %ctlz
; XOP: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %ctlz
}
@ -377,8 +345,6 @@ define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX1: Found an estimated cost of 18 for instruction: %ctlz
; AVX2: Found an estimated cost of 9 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 18 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %ctlz
}
@ -389,8 +355,6 @@ define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
; SSE42: Found an estimated cost of 18 for instruction: %ctlz
; AVX1: Found an estimated cost of 18 for instruction: %ctlz
; AVX2: Found an estimated cost of 9 for instruction: %ctlz
; XOPAVX1: Found an estimated cost of 18 for instruction: %ctlz
; XOPAVX2: Found an estimated cost of 9 for instruction: %ctlz
%ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %ctlz
}
@ -475,7 +439,6 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
; SSE2: Found an estimated cost of 14 for instruction: %cttz
; SSE42: Found an estimated cost of 10 for instruction: %cttz
; AVX: Found an estimated cost of 10 for instruction: %cttz
; XOP: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
ret <2 x i64> %cttz
}
@ -485,7 +448,6 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
; SSE2: Found an estimated cost of 14 for instruction: %cttz
; SSE42: Found an estimated cost of 10 for instruction: %cttz
; AVX: Found an estimated cost of 10 for instruction: %cttz
; XOP: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
ret <2 x i64> %cttz
}
@ -496,8 +458,6 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
; SSE42: Found an estimated cost of 20 for instruction: %cttz
; AVX1: Found an estimated cost of 20 for instruction: %cttz
; AVX2: Found an estimated cost of 10 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 20 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
ret <4 x i64> %cttz
}
@ -508,8 +468,6 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
; SSE42: Found an estimated cost of 20 for instruction: %cttz
; AVX1: Found an estimated cost of 20 for instruction: %cttz
; AVX2: Found an estimated cost of 10 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 20 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 10 for instruction: %cttz
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
ret <4 x i64> %cttz
}
@ -519,7 +477,6 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
; SSE2: Found an estimated cost of 18 for instruction: %cttz
; SSE42: Found an estimated cost of 14 for instruction: %cttz
; AVX: Found an estimated cost of 14 for instruction: %cttz
; XOP: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
ret <4 x i32> %cttz
}
@ -529,7 +486,6 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
; SSE2: Found an estimated cost of 18 for instruction: %cttz
; SSE42: Found an estimated cost of 14 for instruction: %cttz
; AVX: Found an estimated cost of 14 for instruction: %cttz
; XOP: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
ret <4 x i32> %cttz
}
@ -540,8 +496,6 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
; SSE42: Found an estimated cost of 28 for instruction: %cttz
; AVX1: Found an estimated cost of 28 for instruction: %cttz
; AVX2: Found an estimated cost of 14 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 28 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
ret <8 x i32> %cttz
}
@ -552,8 +506,6 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
; SSE42: Found an estimated cost of 28 for instruction: %cttz
; AVX1: Found an estimated cost of 28 for instruction: %cttz
; AVX2: Found an estimated cost of 14 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 28 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 14 for instruction: %cttz
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
ret <8 x i32> %cttz
}
@ -563,7 +515,6 @@ define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
; SSE2: Found an estimated cost of 16 for instruction: %cttz
; SSE42: Found an estimated cost of 12 for instruction: %cttz
; AVX: Found an estimated cost of 12 for instruction: %cttz
; XOP: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
ret <8 x i16> %cttz
}
@ -573,7 +524,6 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
; SSE2: Found an estimated cost of 16 for instruction: %cttz
; SSE42: Found an estimated cost of 12 for instruction: %cttz
; AVX: Found an estimated cost of 12 for instruction: %cttz
; XOP: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
ret <8 x i16> %cttz
}
@ -584,8 +534,6 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
; SSE42: Found an estimated cost of 24 for instruction: %cttz
; AVX1: Found an estimated cost of 24 for instruction: %cttz
; AVX2: Found an estimated cost of 12 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 24 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
ret <16 x i16> %cttz
}
@ -596,8 +544,6 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
; SSE42: Found an estimated cost of 24 for instruction: %cttz
; AVX1: Found an estimated cost of 24 for instruction: %cttz
; AVX2: Found an estimated cost of 12 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 24 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 12 for instruction: %cttz
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
ret <16 x i16> %cttz
}
@ -607,7 +553,6 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
; SSE2: Found an estimated cost of 13 for instruction: %cttz
; SSE42: Found an estimated cost of 9 for instruction: %cttz
; AVX: Found an estimated cost of 9 for instruction: %cttz
; XOP: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
ret <16 x i8> %cttz
}
@ -617,7 +562,6 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
; SSE2: Found an estimated cost of 13 for instruction: %cttz
; SSE42: Found an estimated cost of 9 for instruction: %cttz
; AVX: Found an estimated cost of 9 for instruction: %cttz
; XOP: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
ret <16 x i8> %cttz
}
@ -628,8 +572,6 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
; SSE42: Found an estimated cost of 18 for instruction: %cttz
; AVX1: Found an estimated cost of 18 for instruction: %cttz
; AVX2: Found an estimated cost of 9 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 18 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
ret <32 x i8> %cttz
}
@ -640,8 +582,6 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
; SSE42: Found an estimated cost of 18 for instruction: %cttz
; AVX1: Found an estimated cost of 18 for instruction: %cttz
; AVX2: Found an estimated cost of 9 for instruction: %cttz
; XOPAVX1: Found an estimated cost of 18 for instruction: %cttz
; XOPAVX2: Found an estimated cost of 9 for instruction: %cttz
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
ret <32 x i8> %cttz
}