From 448249fd73a26c0b2820fccf86c45f392855ce09 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 18 Feb 2014 11:15:32 +0000 Subject: [PATCH] X86: use vpsllvd (& friends) for 16-bit shifts on Haswell llvm-svn: 201558 --- lib/Target/X86/X86ISelLowering.cpp | 14 +++++++++++++ test/CodeGen/X86/avx2-shift.ll | 33 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e35aa8fa208..b1d734e932b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13161,6 +13161,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } + if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) { assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); @@ -13204,6 +13205,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return R; } + // It's worth extending once and using the v8i32 shifts for 16-bit types, but + // the extra overheads to get from v16i8 to v8i32 make the existing SSE + // solution better. + if (Subtarget->hasInt256() && VT == MVT::v8i16) { + MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16; + unsigned ExtOpc = + Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + R = DAG.getNode(ExtOpc, dl, NewVT, R); + Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt)); + } + // Decompose 256-bit shifts into smaller 128-bit shifts. if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll index 7fdbaaa39cb..025d52ede0f 100644 --- a/test/CodeGen/X86/avx2-shift.ll +++ b/test/CodeGen/X86/avx2-shift.ll @@ -266,3 +266,36 @@ define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { %c = sext <8 x i16> %b to <8 x i32> ret <8 x i32> %c } + +define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: variable_shl16: +; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] +; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] +; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} +; CHECK: vpshufb +; CHECK: vpermq + %res = shl <8 x i16> %lhs, %rhs + ret <8 x i16> %res +} + +define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: variable_ashr16: +; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] +; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]] +; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}} +; CHECK: vpshufb +; CHECK: vpermq + %res = ashr <8 x i16> %lhs, %rhs + ret <8 x i16> %res +} + +define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: variable_lshr16: +; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] +; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] +; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} +; CHECK: vpshufb +; CHECK: vpermq + %res = lshr <8 x i16> %lhs, %rhs + ret <8 x i16> %res +} \ No newline at end of file