From 3e2e2b550a5946c7b171491f56e735d7a4ccb49d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 18 Nov 2016 06:04:33 +0000 Subject: [PATCH] [InstCombine][AVX-512] Teach InstCombineCalls how to handle the intrinsics for variable shift with 16-bit elements. This is a straightforward extension of the existing support for 32/64-bit element types. Just needed to add the additional instrinsics to the switches. llvm-svn: 287316 --- .../InstCombine/InstCombineCalls.cpp | 18 + .../InstCombine/x86-vector-shifts.ll | 384 ++++++++++++++++++ 2 files changed, 402 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d5abbea6e0a..c9eecf3ea77 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -399,6 +399,9 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, case Intrinsic::x86_avx512_psrav_q_256: case Intrinsic::x86_avx512_psrav_d_512: case Intrinsic::x86_avx512_psrav_q_512: + case Intrinsic::x86_avx512_psrav_w_128: + case Intrinsic::x86_avx512_psrav_w_256: + case Intrinsic::x86_avx512_psrav_w_512: LogicalShift = false; ShiftLeft = false; break; @@ -408,6 +411,9 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, case Intrinsic::x86_avx2_psrlv_q_256: case Intrinsic::x86_avx512_psrlv_d_512: case Intrinsic::x86_avx512_psrlv_q_512: + case Intrinsic::x86_avx512_psrlv_w_128: + case Intrinsic::x86_avx512_psrlv_w_256: + case Intrinsic::x86_avx512_psrlv_w_512: LogicalShift = true; ShiftLeft = false; break; @@ -417,6 +423,9 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, case Intrinsic::x86_avx2_psllv_q_256: case Intrinsic::x86_avx512_psllv_d_512: case Intrinsic::x86_avx512_psllv_q_512: + case Intrinsic::x86_avx512_psllv_w_128: + case Intrinsic::x86_avx512_psllv_w_256: + case Intrinsic::x86_avx512_psllv_w_512: LogicalShift = true; ShiftLeft = true; break; @@ -1873,18 +1882,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_psllv_q_256: case Intrinsic::x86_avx512_psllv_d_512: case Intrinsic::x86_avx512_psllv_q_512: + case Intrinsic::x86_avx512_psllv_w_128: + case Intrinsic::x86_avx512_psllv_w_256: + case Intrinsic::x86_avx512_psllv_w_512: case Intrinsic::x86_avx2_psrav_d: case Intrinsic::x86_avx2_psrav_d_256: case Intrinsic::x86_avx512_psrav_q_128: case Intrinsic::x86_avx512_psrav_q_256: case Intrinsic::x86_avx512_psrav_d_512: case Intrinsic::x86_avx512_psrav_q_512: + case Intrinsic::x86_avx512_psrav_w_128: + case Intrinsic::x86_avx512_psrav_w_256: + case Intrinsic::x86_avx512_psrav_w_512: case Intrinsic::x86_avx2_psrlv_d: case Intrinsic::x86_avx2_psrlv_d_256: case Intrinsic::x86_avx2_psrlv_q: case Intrinsic::x86_avx2_psrlv_q_256: case Intrinsic::x86_avx512_psrlv_d_512: case Intrinsic::x86_avx512_psrlv_q_512: + case Intrinsic::x86_avx512_psrlv_w_128: + case Intrinsic::x86_avx512_psrlv_w_256: + case Intrinsic::x86_avx512_psrlv_w_512: if (Value *V = simplifyX86varShift(*II, *Builder)) return replaceInstUsesWith(*II, V); break; diff --git a/test/Transforms/InstCombine/x86-vector-shifts.ll b/test/Transforms/InstCombine/x86-vector-shifts.ll index eac7fd71f23..07934fbdfe7 100644 --- a/test/Transforms/InstCombine/x86-vector-shifts.ll +++ b/test/Transforms/InstCombine/x86-vector-shifts.ll @@ -1762,6 +1762,116 @@ define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) { ret <8 x i64> %2 } +define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_128_0( +; CHECK-NEXT: ret <8 x i16> %v +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_128_var( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_128_allbig( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_128_undef( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = insertelement <8 x i16> , i16 undef, i64 0 + %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1) + ret <8 x i16> %2 +} + +define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_256_0( +; CHECK-NEXT: ret <16 x i16> %v +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_256_var( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_256_allbig( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_256_undef( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = insertelement <16 x i16> , i16 undef, i64 0 + %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1) + ret <16 x i16> %2 +} + +define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_512_0( +; CHECK-NEXT: ret <32 x i16> %v +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_512_var( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_512_allbig( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrav_w_512_undef( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = insertelement <32 x i16> , i16 undef, i64 0 + %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1) + ret <32 x i16> %2 +} + ; ; LSHR - Constant Per-Element Vector ; @@ -2030,6 +2140,138 @@ define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) { ret <8 x i64> %2 } +define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_128_0( +; CHECK-NEXT: ret <8 x i16> %v +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_128_var( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_128_big( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_128_allbig( +; CHECK-NEXT: ret <8 x i16> +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_128_undef( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = insertelement <8 x i16> , i16 undef, i64 0 + %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1) + ret <8 x i16> %2 +} + +define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_256_0( +; CHECK-NEXT: ret <16 x i16> %v +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_256_var( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_256_big( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_256_allbig( +; CHECK-NEXT: ret <16 x i16> +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_256_undef( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = insertelement <16 x i16> , i16 undef, i64 0 + %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1) + ret <16 x i16> %2 +} + +define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_512_0( +; CHECK-NEXT: ret <32 x i16> %v +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_512_var( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_512_big( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_512_allbig( +; CHECK-NEXT: ret <32 x i16> +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psrlv_w_512_undef( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = insertelement <32 x i16> , i16 undef, i64 0 + %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1) + ret <32 x i16> %2 +} + ; ; SHL - Constant Per-Element Vector ; @@ -2298,6 +2540,138 @@ define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) { ret <8 x i64> %2 } +define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_128_0( +; CHECK-NEXT: ret <8 x i16> %v +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_128_var( +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_128_big( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_128_allbig( +; CHECK-NEXT: ret <8 x i16> +; + %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_128_undef( +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = insertelement <8 x i16> , i16 undef, i64 0 + %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1) + ret <8 x i16> %2 +} + +define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_256_0( +; CHECK-NEXT: ret <16 x i16> %v +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_256_var( +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_256_big( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_256_allbig( +; CHECK-NEXT: ret <16 x i16> +; + %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_256_undef( +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = insertelement <16 x i16> , i16 undef, i64 0 + %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1) + ret <16 x i16> %2 +} + +define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_512_0( +; CHECK-NEXT: ret <32 x i16> %v +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_512_var( +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_512_big( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_512_allbig( +; CHECK-NEXT: ret <32 x i16> +; + %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) { +; CHECK-LABEL: @avx512_psllv_w_512_undef( +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> %v, +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = insertelement <32 x i16> , i16 undef, i64 0 + %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1) + ret <32 x i16> %2 +} + ; ; Vector Demanded Bits ; @@ -3047,4 +3421,14 @@ declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1 declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1 declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1 +declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1 +declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1 +declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1 +declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1 +declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1 +declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1 +declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1 +declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1 +declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1 + attributes #1 = { nounwind readnone }