mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[x86] make sure horizontal op and broadcast types match to simplify (PR41414)
If the types don't match, we can't just remove the shuffle. There may be some other opportunity for optimization here, but this should prevent the crashing seen in: https://bugs.llvm.org/show_bug.cgi?id=41414 llvm-svn: 359095
This commit is contained in:
parent
ca782a7e4a
commit
6aa0bf68a0
@ -32820,10 +32820,13 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
|
||||
|
||||
// For a broadcast, peek through an extract element of index 0 to find the
|
||||
// horizontal op: broadcast (ext_vec_elt HOp, 0)
|
||||
EVT VT = N->getValueType(0);
|
||||
if (Opcode == X86ISD::VBROADCAST) {
|
||||
SDValue SrcOp = N->getOperand(0);
|
||||
if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
|
||||
SrcOp.getValueType() == MVT::f64 &&
|
||||
SrcOp.getOperand(0).getValueType() == VT &&
|
||||
isNullConstant(SrcOp.getOperand(1)))
|
||||
N = SrcOp.getNode();
|
||||
}
|
||||
|
||||
@ -32847,7 +32850,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
|
||||
// movddup (hadd X, X) --> hadd X, X
|
||||
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
|
||||
assert((HOp.getValueType() == MVT::v2f64 ||
|
||||
HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
|
||||
HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
|
||||
"Unexpected type for h-op");
|
||||
return HOp;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
||||
|
||||
define float @pr26491(<4 x float> %a0) {
|
||||
; SSE2-LABEL: pr26491:
|
||||
@ -37,3 +38,66 @@ define float @pr26491(<4 x float> %a0) {
|
||||
%5 = fadd float %3, %4
|
||||
ret float %5
|
||||
}
|
||||
|
||||
; When simplifying away a splat (broadcast), the hop type must match the shuffle type.
|
||||
|
||||
define <4 x double> @PR41414(i64 %x, <4 x double> %y) {
|
||||
; SSE2-LABEL: PR41414:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq %rdi, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
|
||||
; SSE2-NEXT: subpd {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: movapd %xmm2, %xmm3
|
||||
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
|
||||
; SSE2-NEXT: addpd %xmm2, %xmm3
|
||||
; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0]
|
||||
; SSE2-NEXT: divpd %xmm3, %xmm1
|
||||
; SSE2-NEXT: divpd %xmm3, %xmm0
|
||||
; SSE2-NEXT: xorpd %xmm2, %xmm2
|
||||
; SSE2-NEXT: addpd %xmm2, %xmm0
|
||||
; SSE2-NEXT: addpd %xmm2, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: PR41414:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movq %rdi, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
|
||||
; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2
|
||||
; SSSE3-NEXT: haddpd %xmm2, %xmm2
|
||||
; SSSE3-NEXT: divpd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: divpd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: xorpd %xmm2, %xmm2
|
||||
; SSSE3-NEXT: addpd %xmm2, %xmm0
|
||||
; SSSE3-NEXT: addpd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: PR41414:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq %rdi, %xmm1
|
||||
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
|
||||
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: PR41414:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq %rdi, %xmm1
|
||||
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
|
||||
; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%conv = uitofp i64 %x to double
|
||||
%t0 = insertelement <4 x double> undef, double %conv, i32 0
|
||||
%t1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
%t2 = fdiv <4 x double> %y, %t1
|
||||
%t3 = fadd <4 x double> zeroinitializer, %t2
|
||||
ret <4 x double> %t3
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user