diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index e53e40b57e1..888164df75f 100644 --- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -1,9 +1,12 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW ; Verify the cost of vector arithmetic shift right instructions. @@ -17,6 +20,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <2 x i64> %a, %b ret <2 x i64> %shift @@ -28,17 +32,31 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, %b ret <4 x i64> %shift } +define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': +; SSE2: Found an estimated cost of 48 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <8 x i64> %a, %b + ret <8 x i64> %shift +} + define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <4 x i32> %a, %b @@ -51,18 +69,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i32> %a, %b ret <8 x i32> %shift } +define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = ashr <16 x i32> %a, %b + ret <16 x i32> %shift +} + define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <8 x i16> %a, %b ret <8 x i16> %shift @@ -74,17 +107,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, %b ret <16 x i16> %shift } +define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <32 x i16> %a, %b + ret <32 x i16> %shift +} + define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 54 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, %b ret <16 x i8> %shift @@ -96,11 +144,26 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE41: Found an estimated cost of 48 for instruction: %shift ; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512F: Found an estimated cost of 24 for instruction: %shift +; AVX512BW: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, %b ret <32 x i8> %shift } +define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': +; SSE2: Found an estimated cost of 216 for instruction: %shift +; SSE41: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX2: Found an estimated cost of 48 for instruction: %shift +; AVX512F: Found an estimated cost of 48 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <64 x i8> %a, %b + ret <64 x i8> %shift +} + ; ; Uniform Variable Shifts ; @@ -111,6 +174,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer %shift = ashr <2 x i64> %a, %splat @@ -123,18 +187,33 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer %shift = ashr <4 x i64> %a, %splat ret <4 x i64> %shift } +define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64': +; SSE2: Found an estimated cost of 48 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer + %shift = ashr <8 x i64> %a, %splat + ret <8 x i64> %shift +} + define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer @@ -148,6 +227,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer @@ -155,12 +235,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shift } +define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer + %shift = ashr <16 x i32> %a, %splat + ret <16 x i32> %shift +} + define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = ashr <8 x i16> %a, %splat @@ -173,18 +268,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer %shift = ashr <16 x i16> %a, %splat ret <16 x i16> %shift } +define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer + %shift = ashr <32 x i16> %a, %splat + ret <32 x i16> %shift +} + define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 54 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = ashr <16 x i8> %a, %splat @@ -197,12 +308,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE41: Found an estimated cost of 48 for instruction: %shift ; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = ashr <32 x i8> %a, %splat ret <32 x i8> %shift } +define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': +; SSE2: Found an estimated cost of 216 for instruction: %shift +; SSE41: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX2: Found an estimated cost of 48 for instruction: %shift +; AVX512F: Found an estimated cost of 48 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %shift = ashr <64 x i8> %a, %splat + ret <64 x i8> %shift +} + ; ; Constant Shifts ; @@ -213,6 +339,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <2 x i64> %a, ret <2 x i64> %shift @@ -224,17 +351,31 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } +define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': +; SSE2: Found an estimated cost of 48 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <8 x i64> %a, + ret <8 x i64> %shift +} + define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <4 x i32> %a, @@ -247,18 +388,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i32> %a, ret <8 x i32> %shift } +define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = ashr <16 x i32> %a, + ret <16 x i32> %shift +} + define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <8 x i16> %a, ret <8 x i16> %shift @@ -270,17 +426,32 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } +define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <32 x i16> %a, + ret <32 x i16> %shift +} + define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 54 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -292,11 +463,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; SSE41: Found an estimated cost of 48 for instruction: %shift ; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } +define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': +; SSE2: Found an estimated cost of 216 for instruction: %shift +; SSE41: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX2: Found an estimated cost of 48 for instruction: %shift +; AVX512F: Found an estimated cost of 48 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <64 x i8> %a, + ret <64 x i8> %shift +} + ; ; Uniform Constant Shifts ; @@ -307,6 +492,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <2 x i64> %a, ret <2 x i64> %shift @@ -318,17 +504,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } +define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <8 x i64> %a, + ret <8 x i64> %shift +} + define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <4 x i32> %a, @@ -341,18 +541,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i32> %a, ret <8 x i32> %shift } +define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = ashr <16 x i32> %a, + ret <16 x i32> %shift +} + define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <8 x i16> %a, ret <8 x i16> %shift @@ -364,17 +579,32 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } +define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <32 x i16> %a, + ret <32 x i16> %shift +} + define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -386,7 +616,21 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift +; AVX512: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } + +define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 48 for instruction: %shift +; AVX512F: Found an estimated cost of 48 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = ashr <64 x i8> %a, + ret <64 x i8> %shift +} diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 6d028268ea5..b3382253739 100644 --- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -1,9 +1,12 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW ; Verify the cost of vector logical shift right instructions. @@ -17,6 +20,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <2 x i64> %a, %b @@ -29,18 +33,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i64> %a, %b ret <4 x i64> %shift } +define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <8 x i64> %a, %b + ret <8 x i64> %shift +} + define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 11 for instruction: %shift ; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i32> %a, %b @@ -53,18 +72,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE41: Found an estimated cost of 22 for instruction: %shift ; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i32> %a, %b ret <8 x i32> %shift } +define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <16 x i32> %a, %b + ret <16 x i32> %shift +} + define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i16> %a, %b ret <8 x i16> %shift @@ -76,17 +110,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, %b ret <16 x i16> %shift } +define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = lshr <32 x i16> %a, %b + ret <32 x i16> %shift +} + define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 12 for instruction: %shift +; AVX512: Found an estimated cost of 12 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, %b ret <16 x i8> %shift @@ -98,11 +147,25 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, %b ret <32 x i8> %shift } +define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': +; SSE2: Found an estimated cost of 104 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = lshr <64 x i8> %a, %b + ret <64 x i8> %shift +} + ; ; Uniform Variable Shifts ; @@ -113,6 +176,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -126,6 +190,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer @@ -133,12 +198,27 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %shift } +define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer + %shift = lshr <8 x i64> %a, %splat + ret <8 x i64> %shift +} + define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 11 for instruction: %shift ; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer @@ -152,6 +232,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE41: Found an estimated cost of 22 for instruction: %shift ; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer @@ -159,12 +240,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shift } +define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer + %shift = lshr <16 x i32> %a, %splat + ret <16 x i32> %shift +} + define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = lshr <8 x i16> %a, %splat @@ -177,18 +273,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer %shift = lshr <16 x i16> %a, %splat ret <16 x i16> %shift } +define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer + %shift = lshr <32 x i16> %a, %splat + ret <32 x i16> %shift +} + define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 12 for instruction: %shift +; AVX512: Found an estimated cost of 12 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = lshr <16 x i8> %a, %splat @@ -201,12 +313,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = lshr <32 x i8> %a, %splat ret <32 x i8> %shift } +define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': +; SSE2: Found an estimated cost of 104 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %shift = lshr <64 x i8> %a, %splat + ret <64 x i8> %shift +} + ; ; Constant Shifts ; @@ -217,6 +344,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <2 x i64> %a, @@ -229,18 +357,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i64> %a, ret <4 x i64> %shift } +define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <8 x i64> %a, + ret <8 x i64> %shift +} + define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 11 for instruction: %shift ; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i32> %a, @@ -253,18 +396,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; SSE41: Found an estimated cost of 22 for instruction: %shift ; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i32> %a, ret <8 x i32> %shift } +define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <16 x i32> %a, + ret <16 x i32> %shift +} + define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i16> %a, ret <8 x i16> %shift @@ -276,17 +434,32 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } +define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = lshr <32 x i16> %a, + ret <32 x i16> %shift +} + define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 12 for instruction: %shift ; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 12 for instruction: %shift +; AVX512: Found an estimated cost of 12 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, ret <16 x i8> %shift @@ -298,11 +471,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; SSE41: Found an estimated cost of 24 for instruction: %shift ; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } +define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': +; SSE2: Found an estimated cost of 104 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = lshr <64 x i8> %a, + ret <64 x i8> %shift +} + ; ; Uniform Constant Shifts ; @@ -313,6 +500,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <2 x i64> %a, @@ -325,18 +513,33 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i64> %a, ret <4 x i64> %shift } +define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <8 x i64> %a, + ret <8 x i64> %shift +} + define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i32> %a, @@ -349,18 +552,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i32> %a, ret <8 x i32> %shift } +define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <16 x i32> %a, + ret <16 x i32> %shift +} + define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i16> %a, ret <8 x i16> %shift @@ -372,17 +590,32 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } +define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = lshr <32 x i16> %a, + ret <32 x i16> %shift +} + define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, ret <16 x i8> %shift @@ -394,7 +627,21 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } + +define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift + %shift = lshr <64 x i8> %a, + ret <64 x i8> %shift +} diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 60ba3adea42..804c5a76c31 100644 --- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -1,9 +1,12 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW ; Verify the cost of vector shift left instructions. @@ -18,6 +21,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <2 x i64> %a, %b @@ -30,18 +34,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, %b ret <4 x i64> %shift } +define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <8 x i64> %a, %b + ret <8 x i64> %shift +} + define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 10 for instruction: %shift ; SSE41: Found an estimated cost of 10 for instruction: %shift ; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i32> %a, %b @@ -54,18 +73,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE41: Found an estimated cost of 20 for instruction: %shift ; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, %b ret <8 x i32> %shift } +define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': +; SSE2: Found an estimated cost of 40 for instruction: %shift +; SSE41: Found an estimated cost of 40 for instruction: %shift +; AVX: Found an estimated cost of 40 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <16 x i32> %a, %b + ret <16 x i32> %shift +} + define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, %b ret <8 x i16> %shift @@ -77,17 +111,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i16> %a, %b ret <16 x i16> %shift } +define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = shl <32 x i16> %a, %b + ret <32 x i16> %shift +} + define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 11 for instruction: %shift ; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, %b ret <16 x i8> %shift @@ -99,11 +148,25 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE41: Found an estimated cost of 22 for instruction: %shift ; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, %b ret <32 x i8> %shift } +define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': +; SSE2: Found an estimated cost of 104 for instruction: %shift +; SSE41: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = shl <64 x i8> %a, %b + ret <64 x i8> %shift +} + ; ; Uniform Variable Shifts ; @@ -114,6 +177,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -127,6 +191,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer @@ -134,12 +199,27 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %shift } +define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer + %shift = shl <8 x i64> %a, %splat + ret <8 x i64> %shift +} + define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 10 for instruction: %shift ; SSE41: Found an estimated cost of 10 for instruction: %shift ; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer @@ -153,6 +233,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE41: Found an estimated cost of 20 for instruction: %shift ; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer @@ -160,12 +241,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shift } +define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': +; SSE2: Found an estimated cost of 40 for instruction: %shift +; SSE41: Found an estimated cost of 40 for instruction: %shift +; AVX: Found an estimated cost of 40 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer + %shift = shl <16 x i32> %a, %splat + ret <16 x i32> %shift +} + define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 14 for instruction: %shift ; AVX: Found an estimated cost of 14 for instruction: %shift ; AVX2: Found an estimated cost of 14 for instruction: %shift +; AVX512: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = shl <8 x i16> %a, %splat @@ -178,18 +274,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE41: Found an estimated cost of 28 for instruction: %shift ; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift +; AVX512: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer %shift = shl <16 x i16> %a, %splat ret <16 x i16> %shift } +define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16': +; SSE2: Found an estimated cost of 128 for instruction: %shift +; SSE41: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX2: Found an estimated cost of 20 for instruction: %shift +; AVX512F: Found an estimated cost of 20 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer + %shift = shl <32 x i16> %a, %splat + ret <32 x i16> %shift +} + define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 11 for instruction: %shift ; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = shl <16 x i8> %a, %splat @@ -202,12 +314,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE41: Found an estimated cost of 22 for instruction: %shift ; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = shl <32 x i8> %a, %splat ret <32 x i8> %shift } +define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': +; SSE2: Found an estimated cost of 104 for instruction: %shift +; SSE41: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %shift = shl <64 x i8> %a, %splat + ret <64 x i8> %shift +} + ; ; Constant Shifts ; @@ -218,6 +345,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <2 x i64> %a, @@ -230,18 +358,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, ret <4 x i64> %shift } +define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <8 x i64> %a, + ret <8 x i64> %shift +} + define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': ; SSE2: Found an estimated cost of 6 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i32> %a, @@ -254,18 +397,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, ret <8 x i32> %shift } +define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': +; SSE2: Found an estimated cost of 24 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <16 x i32> %a, + ret <16 x i32> %shift +} + define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, ret <8 x i16> %shift @@ -277,18 +435,34 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i16> %a, ret <16 x i16> %shift } +define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <32 x i16> %a, + ret <32 x i16> %shift +} + define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 11 for instruction: %shift ; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift @@ -300,11 +474,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; SSE41: Found an estimated cost of 22 for instruction: %shift ; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, ret <32 x i8> %shift } +define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': +; SSE2: Found an estimated cost of 104 for instruction: %shift +; SSE41: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = shl <64 x i8> %a, + ret <64 x i8> %shift +} + ; ; Uniform Constant Shifts ; @@ -315,6 +503,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <2 x i64> %a, @@ -327,18 +516,33 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, ret <4 x i64> %shift } +define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <8 x i64> %a, + ret <8 x i64> %shift +} + define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i32> %a, @@ -351,18 +555,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, ret <8 x i32> %shift } +define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <16 x i32> %a, + ret <16 x i32> %shift +} + define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, ret <8 x i16> %shift @@ -374,18 +593,34 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i16> %a, ret <16 x i16> %shift } +define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift + %shift = shl <32 x i16> %a, + ret <32 x i16> %shift +} + define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift @@ -397,11 +632,25 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift +; AVX512: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, ret <32 x i8> %shift } +define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 22 for instruction: %shift +; AVX512F: Found an estimated cost of 22 for instruction: %shift +; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = shl <64 x i8> %a, + ret <64 x i8> %shift +} + ; ; Special Cases ;