diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e2f5807aa55..6e45621bd96 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22279,9 +22279,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, } } - // Special case in 32-bit mode, where i64 is expanded into high and low parts. - if (!Subtarget.is64Bit() && VT == MVT::v2i64 && - Amt.getOpcode() == ISD::BITCAST && + // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. + if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / diff --git a/test/CodeGen/X86/pr34855.ll b/test/CodeGen/X86/pr34855.ll new file mode 100644 index 00000000000..286d7d2fe84 --- /dev/null +++ b/test/CodeGen/X86/pr34855.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 + +define void @PR34855(<2 x i32> *%p0, <2 x i32> *%p1, <2 x i32> *%p2) { +; X86-LABEL: PR34855: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: movlps %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: PR34855: +; X64: # BB#0: +; X64-NEXT: movslq 4(%rdi), %rax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: movslq (%rdi), %rax +; X64-NEXT: movq %rax, %xmm1 +; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: pxor %xmm0, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; X64-NEXT: psrlq %xmm0, %xmm2 +; X64-NEXT: psrlq %xmm0, %xmm1 +; X64-NEXT: pxor %xmm2, %xmm1 +; X64-NEXT: psubq %xmm2, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X64-NEXT: movq %xmm0, (%rdx) +; X64-NEXT: retq + %tmp = load <2 x i32>, <2 x i32>* %p0, align 8 + %tmp1 = load <2 x i32>, <2 x i32>* %p1, align 8 + %mul = mul <2 x i32> zeroinitializer, %tmp1 + %mul1 = mul <2 x i32> , %mul + %mul2 = mul <2 x i32> , %mul1 + %shr = ashr <2 x i32> %tmp, %mul2 + store <2 x i32> %shr, <2 x i32>* %p2, align 8 + ret void +}