From 3a9c51f2561fb5a8a27b560c3fa61cec8f334824 Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Thu, 3 Dec 2009 07:11:29 +0000 Subject: [PATCH] Don't pull vector sext through both hands of a logical operation, since doing so prevents the fusion of vector sext and setcc into vsetcc. Add a testcase for the above transformation. Fix a bogus use of APInt noticed while tracking this down. llvm-svn: 90423 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++++-- test/CodeGen/X86/vec_compare-2.ll | 29 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/vec_compare-2.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 64331b0fe26..204cbc4dc14 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1688,10 +1688,14 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| N0.getOpcode() == ISD::SIGN_EXTEND || (N0.getOpcode() == ISD::TRUNCATE && !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + !VT.isVector() && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { @@ -1944,8 +1948,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll new file mode 100644 index 00000000000..091641b3bc3 --- /dev/null +++ b/test/CodeGen/X86/vec_compare-2.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s + +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone + +define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) { +entry: +; CHECK-NOT: set +; CHECK: pcmpgt +; CHECK: blendvps + %shr.i = ashr <4 x i32> zeroinitializer, ; <<4 x i32>> [#uses=1] + %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1] + %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %cmp323.x = icmp slt <4 x i32> zeroinitializer, %sub322.i ; <<4 x i1>> [#uses=1] + %cmp323.i = sext <4 x i1> %cmp323.x to <4 x i32> ; <<4 x i32>> [#uses=1] + %or.i = or <4 x i32> %cmp318.i, %cmp323.i ; <<4 x i32>> [#uses=1] + %tmp10.i83.i = bitcast <4 x i32> %or.i to <4 x float> ; <<4 x float>> [#uses=1] + %0 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> undef, <4 x float> undef, <4 x float> %tmp10.i83.i) nounwind ; <<4 x float>> [#uses=1] + %conv.i.i15.i = bitcast <4 x float> %0 to <4 x i32> ; <<4 x i32>> [#uses=1] + %swz.i.i28.i = shufflevector <4 x i32> %conv.i.i15.i, <4 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %tmp6.i29.i = bitcast <2 x i32> %swz.i.i28.i to <4 x i16> ; <<4 x i16>> [#uses=1] + %swz.i30.i = shufflevector <4 x i16> %tmp6.i29.i, <4 x i16> undef, <2 x i32> ; <<2 x i16>> [#uses=1] + store <2 x i16> %swz.i30.i, <2 x i16>* undef + unreachable + ret void +}