diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 52de05117b1..22bd89990b1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20427,6 +20427,9 @@ static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] == -1) continue; + // If we reference the upper (undef) subvector then the element is undef. + if ((Mask[i] % NumElts) >= HalfNumElts) + continue; int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; if (i < HalfNumElts) Mask0[i] = M; diff --git a/test/CodeGen/X86/pr50609.ll b/test/CodeGen/X86/pr50609.ll new file mode 100644 index 00000000000..44e004825e7 --- /dev/null +++ b/test/CodeGen/X86/pr50609.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s + +define void @PR50609(float* noalias nocapture %RET, float* noalias %aFOO, <16 x i32> %__mask) nounwind { +; CHECK-LABEL: PR50609: +; CHECK: # %bb.0: # %allocas +; CHECK-NEXT: leal 40(%rsi), %eax +; CHECK-NEXT: vmovq %rsi, %xmm2 +; CHECK-NEXT: vmovd %eax, %xmm3 +; CHECK-NEXT: vpsubq %xmm2, %xmm3, %xmm2 +; CHECK-NEXT: vpsrad $31, %xmm2, %xmm3 +; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3 +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2 +; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2 +; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2 +; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 +; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi) +; CHECK-NEXT: vmaskmovps %ymm2, %ymm1, 32(%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +allocas: + %aFOO_load_ptr2int = ptrtoint float* %aFOO to i64 + %aFOO_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 + %aFOO_load4_offset = getelementptr float, float* %aFOO, i64 10 + %c_load_ptr2int = ptrtoint float* %aFOO_load4_offset to i64 + %c_load_ptr2int_broadcast = insertelement <16 x i64> undef, i64 %c_load_ptr2int, i32 0 + %0 = sub <16 x i64> %c_load_ptr2int_broadcast, %aFOO_load_ptr2int_broadcast + %1 = trunc <16 x i64> %0 to <16 x i32> + %2 = sdiv <16 x i32> %1, + %3 = sitofp <16 x i32> %2 to <16 x float> + %ptr.i.i = bitcast float* %RET to i8* + %val0.i.i = shufflevector <16 x float> %3, <16 x float> undef, <8 x i32> zeroinitializer + %mask0.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> + %mask1.i.i = shufflevector <16 x i32> %__mask, <16 x i32> undef, <8 x i32> + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr.i.i, <8 x i32> %mask0.i.i, <8 x float> %val0.i.i) #1 + %ptr1.i.i16 = getelementptr float, float* %RET, i64 8 + %ptr1.i.i = bitcast float* %ptr1.i.i16 to i8* + call void @llvm.x86.avx.maskstore.ps.256(i8* %ptr1.i.i, <8 x i32> %mask1.i.i, <8 x float> %val0.i.i) #1 + ret void +} +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)