1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

[X86][SSE] Match bitcasted v4i32 BUILD_VECTORS for v2i64 shifts on 64-bit targets (PR34855)

We were already doing this for 32-bit targets, but we can generate these on 64-bits as well.

llvm-svn: 315155
This commit is contained in:
Simon Pilgrim 2017-10-07 17:42:17 +00:00
parent 09f6a394af
commit 626cb4b4a5
2 changed files with 40 additions and 3 deletions

View File

@ -22279,9 +22279,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
}
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget.is64Bit() && VT == MVT::v2i64 &&
Amt.getOpcode() == ISD::BITCAST &&
// Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /

View File

@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
define void @PR34855(<2 x i32> *%p0, <2 x i32> *%p1, <2 x i32> *%p2) {
; X86-LABEL: PR34855:
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: movlps %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: PR34855:
; X64: # BB#0:
; X64-NEXT: movslq 4(%rdi), %rax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movslq (%rdi), %rax
; X64-NEXT: movq %rax, %xmm1
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; X64-NEXT: psrlq %xmm0, %xmm2
; X64-NEXT: psrlq %xmm0, %xmm1
; X64-NEXT: pxor %xmm2, %xmm1
; X64-NEXT: psubq %xmm2, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X64-NEXT: movq %xmm0, (%rdx)
; X64-NEXT: retq
%tmp = load <2 x i32>, <2 x i32>* %p0, align 8
%tmp1 = load <2 x i32>, <2 x i32>* %p1, align 8
%mul = mul <2 x i32> zeroinitializer, %tmp1
%mul1 = mul <2 x i32> <i32 -8190, i32 -8190>, %mul
%mul2 = mul <2 x i32> <i32 3, i32 3>, %mul1
%shr = ashr <2 x i32> %tmp, %mul2
store <2 x i32> %shr, <2 x i32>* %p2, align 8
ret void
}