From c32d634c5e4b595c3f3b231b867e6ee9f02f64eb Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 May 2018 21:58:44 +0000 Subject: [PATCH] [InstCombine] refine select-of-constants to bitwise ops Add logic for the special case when a cmp+select can clearly be reduced to just a bitwise logic instruction, and remove an over-reaching chunk of general purpose bit magic. The primary goal is to remove cases where we are not improving the IR instruction count when doing these select transforms, and in all cases here that is true. In the motivating 3-way compare tests, there are further improvements because we can combine/propagate select values (not sure if that belongs in instcombine, but it's there for now). DAGCombiner has folds to turn some of these selects into bit magic, so there should be no difference in the end result in those cases. Not all constant combinations are handled there yet, however, so it is possible that some targets will see more cmov/csel codegen with this change in IR canonicalization. Ideally, we'll go further to *not* turn selects into multiple logic/math ops in instcombine, and we'll canonicalize to selects. But we should make sure that this step does not result in regressions first (and if it does, we should fix those in the backend). The general direction for this change was discussed here: http://lists.llvm.org/pipermail/llvm-dev/2016-September/105373.html http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html Alive proofs for the new bit magic: https://rise4fun.com/Alive/XG7 Differential Revision: https://reviews.llvm.org/D46086 llvm-svn: 331486 --- .../InstCombine/InstCombineSelect.cpp | 91 +++----- test/Transforms/InstCombine/rem.ll | 11 +- .../Transforms/InstCombine/select-icmp-and.ll | 198 ++++++++---------- .../unrecognized_three-way-comparison.ll | 58 ++--- 4 files changed, 144 insertions(+), 214 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index affeb740f27..b4db091a439 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -100,23 +100,41 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, return nullptr; } - // If both select arms are non-zero see if we have a select of the form - // 'x ? 2^n + TC : FC'. Then we can offset both arms by C, use the logic - // for 'x ? 2^n : 0' and fix the thing up at the end. + // In general, when both constants are non-zero, we would need an offset to + // replace the select. This would require more instructions than we started + // with. But there's one special-case that we handle here because it can + // simplify/reduce the instructions. APInt TC = *SelTC; APInt FC = *SelFC; - APInt Offset(TC.getBitWidth(), 0); if (!TC.isNullValue() && !FC.isNullValue()) { - if ((TC - FC).isPowerOf2()) - Offset = FC; - else if ((FC - TC).isPowerOf2()) - Offset = TC; - else + // If the select constants differ by exactly one bit and that's the same + // bit that is masked and checked by the select condition, the select can + // be replaced by bitwise logic to set/clear one bit of the constant result. + if (TC.getBitWidth() != AndMask.getBitWidth() || (TC ^ FC) != AndMask) return nullptr; - - // Adjust TC and FC by the offset. - TC -= Offset; - FC -= Offset; + if (CreateAnd) { + // If we have to create an 'and', then we must kill the cmp to not + // increase the instruction count. + if (!Cmp->hasOneUse()) + return nullptr; + V = Builder.CreateAnd(V, ConstantInt::get(SelType, AndMask)); + } + bool ExtraBitInTC = TC.ugt(FC); + if (Pred == ICmpInst::ICMP_EQ) { + // If the masked bit in V is clear, clear or set the bit in the result: + // (V & AndMaskC) == 0 ? TC : FC --> (V & AndMaskC) ^ TC + // (V & AndMaskC) == 0 ? TC : FC --> (V & AndMaskC) | TC + Constant *C = ConstantInt::get(SelType, TC); + return ExtraBitInTC ? Builder.CreateXor(V, C) : Builder.CreateOr(V, C); + } + if (Pred == ICmpInst::ICMP_NE) { + // If the masked bit in V is set, set or clear the bit in the result: + // (V & AndMaskC) != 0 ? TC : FC --> (V & AndMaskC) | FC + // (V & AndMaskC) != 0 ? TC : FC --> (V & AndMaskC) ^ FC + Constant *C = ConstantInt::get(SelType, FC); + return ExtraBitInTC ? Builder.CreateOr(V, C) : Builder.CreateXor(V, C); + } + llvm_unreachable("Only expecting equality predicates"); } // Make sure one of the select arms is a power-of-2. @@ -152,9 +170,6 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, if (ShouldNotVal) V = Builder.CreateXor(V, ValC); - // Apply an offset if needed. - if (!Offset.isNullValue()) - V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); return V; } @@ -790,51 +805,13 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, bool Changed = adjustMinMax(SI, *ICI); - ICmpInst::Predicate Pred = ICI->getPredicate(); - Value *CmpLHS = ICI->getOperand(0); - Value *CmpRHS = ICI->getOperand(1); - - // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 - // and (X ((X >>s 31) & (C2 - C1)) + C1 - // FIXME: Type and constness constraints could be lifted, but we have to - // watch code size carefully. We should consider xor instead of - // sub/add when we decide to do that. - // TODO: Merge this with foldSelectICmpAnd somehow. - if (CmpLHS->getType()->isIntOrIntVectorTy() && - CmpLHS->getType() == TrueVal->getType()) { - const APInt *C1, *C2; - if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) { - ICmpInst::Predicate Pred = ICI->getPredicate(); - Value *X; - APInt Mask; - if (decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask, false)) { - if (Mask.isSignMask()) { - assert(X == CmpLHS && "Expected to use the compare input directly"); - assert(ICmpInst::isEquality(Pred) && "Expected equality predicate"); - - if (Pred == ICmpInst::ICMP_NE) - std::swap(C1, C2); - - // This shift results in either -1 or 0. - Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1); - - // Check if we can express the operation with a single or. - if (C2->isAllOnesValue()) - return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1)); - - Value *And = Builder.CreateAnd(AShr, *C2 - *C1); - return replaceInstUsesWith(SI, Builder.CreateAdd(And, - ConstantInt::get(And->getType(), *C1))); - } - } - } - } - if (Value *V = foldSelectICmpAnd(SI, ICI, Builder)) return replaceInstUsesWith(SI, V); // NOTE: if we wanted to, this is where to detect integer MIN/MAX - + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); if (CmpRHS != CmpLHS && isa(CmpRHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll index d4f1a799f82..da53c62b054 100644 --- a/test/Transforms/InstCombine/rem.ll +++ b/test/Transforms/InstCombine/rem.ll @@ -354,12 +354,11 @@ define i32 @test17(i32 %X) { define i32 @test18(i16 %x, i32 %y) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[TMP1:%.*]] = shl i16 [[X:%.*]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 32 -; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP2]], 63 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[Y:%.*]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 63, i32 31 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[TMP4]] ; %1 = and i16 %x, 4 %2 = icmp ne i16 %1, 0 diff --git a/test/Transforms/InstCombine/select-icmp-and.ll b/test/Transforms/InstCombine/select-icmp-and.ll index 7f3f3e44cb5..306f1384533 100644 --- a/test/Transforms/InstCombine/select-icmp-and.ll +++ b/test/Transforms/InstCombine/select-icmp-and.ll @@ -29,10 +29,9 @@ define i1023 @test6(i1023 %X) { define i32 @test35(i32 %x) { ; CHECK-LABEL: @test35( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 40 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 60 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 60, i32 100 +; CHECK-NEXT: ret i32 [[COND]] ; %cmp = icmp sge i32 %x, 0 %cond = select i1 %cmp, i32 60, i32 100 @@ -41,10 +40,9 @@ define i32 @test35(i32 %x) { define <2 x i32> @test35vec(<2 x i32> %x) { ; CHECK-LABEL: @test35vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <2 x i32> [[TMP2]], -; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp sge <2 x i32> %x, %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> @@ -55,10 +53,9 @@ define <2 x i32> @test35vec(<2 x i32> %x) { define i32 @test35_with_trunc(i64 %x) { ; CHECK-LABEL: @test35_with_trunc( ; CHECK-NEXT: [[X1:%.*]] = trunc i64 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X1]], 31 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 40 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 60 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X1]], -1 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 60, i32 100 +; CHECK-NEXT: ret i32 [[COND]] ; %x1 = trunc i64 %x to i32 %cmp = icmp sge i32 %x1, 0 @@ -68,10 +65,9 @@ define i32 @test35_with_trunc(i64 %x) { define i32 @test36(i32 %x) { ; CHECK-LABEL: @test36( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -40 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], 100 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 60, i32 100 +; CHECK-NEXT: ret i32 [[COND]] ; %cmp = icmp slt i32 %x, 0 %cond = select i1 %cmp, i32 60, i32 100 @@ -80,10 +76,9 @@ define i32 @test36(i32 %x) { define <2 x i32> @test36vec(<2 x i32> %x) { ; CHECK-LABEL: @test36vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], -; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp slt <2 x i32> %x, %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> @@ -92,9 +87,9 @@ define <2 x i32> @test36vec(<2 x i32> %x) { define i32 @test37(i32 %x) { ; CHECK-LABEL: @test37( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 1 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 1, i32 -1 +; CHECK-NEXT: ret i32 [[COND]] ; %cmp = icmp sgt i32 %x, -1 %cond = select i1 %cmp, i32 1, i32 -1 @@ -103,9 +98,9 @@ define i32 @test37(i32 %x) { define <2 x i32> @test37vec(<2 x i32> %x) { ; CHECK-LABEL: @test37vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp sgt <2 x i32> %x, %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> @@ -114,11 +109,10 @@ define <2 x i32> @test37vec(<2 x i32> %x) { define i32 @test65(i64 %x) { ; CHECK-LABEL: @test65( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP3]], 42 -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40 +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i64 %x, 16 %2 = icmp ne i64 %1, 0 @@ -128,11 +122,10 @@ define i32 @test65(i64 %x) { define <2 x i32> @test65vec(<2 x i64> %x) { ; CHECK-LABEL: @test65vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], -; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i64> %x, %2 = icmp ne <2 x i64> %1, zeroinitializer @@ -142,11 +135,10 @@ define <2 x i32> @test65vec(<2 x i64> %x) { define i32 @test66(i64 %x) { ; CHECK-LABEL: @test66( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 31 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP3]], 42 -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40 +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i64 %x, 4294967296 %2 = icmp ne i64 %1, 0 @@ -156,11 +148,10 @@ define i32 @test66(i64 %x) { define <2 x i32> @test66vec(<2 x i64> %x) { ; CHECK-LABEL: @test66vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], -; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i64> %x, %2 = icmp ne <2 x i64> %1, zeroinitializer @@ -184,11 +175,10 @@ define <2 x i32> @test66vec_scalar_and(i64 %x) { define i32 @test67(i16 %x) { ; CHECK-LABEL: @test67( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = xor i16 [[TMP2]], 42 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40 +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i16 %x, 4 %2 = icmp ne i16 %1, 0 @@ -198,11 +188,10 @@ define i32 @test67(i16 %x) { define <2 x i32> @test67vec(<2 x i16> %x) { ; CHECK-LABEL: @test67vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i16> %x, %2 = icmp ne <2 x i16> %1, zeroinitializer @@ -212,9 +201,9 @@ define <2 x i32> @test67vec(<2 x i16> %x) { define i32 @test71(i32 %x) { ; CHECK-LABEL: @test71( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 42 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40 ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i32 %x, 128 @@ -225,9 +214,9 @@ define i32 @test71(i32 %x) { define <2 x i32> @test71vec(<2 x i32> %x) { ; CHECK-LABEL: @test71vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i32> %x, @@ -238,9 +227,9 @@ define <2 x i32> @test71vec(<2 x i32> %x) { define i32 @test72(i32 %x) { ; CHECK-LABEL: @test72( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], 40 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42 ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = and i32 %x, 128 @@ -251,9 +240,9 @@ define i32 @test72(i32 %x) { define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i32> %x, @@ -264,9 +253,9 @@ define <2 x i32> @test72vec(<2 x i32> %x) { define i32 @test73(i32 %x) { ; CHECK-LABEL: @test73( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], 40 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i8 [[TMP1]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42 ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = trunc i32 %x to i8 @@ -277,9 +266,9 @@ define i32 @test73(i32 %x) { define <2 x i32> @test73vec(<2 x i32> %x) { ; CHECK-LABEL: @test73vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = trunc <2 x i32> %x to <2 x i8> @@ -290,10 +279,9 @@ define <2 x i32> @test73vec(<2 x i32> %x) { define i32 @test74(i32 %x) { ; CHECK-LABEL: @test74( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], 40 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 40, i32 42 +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = icmp sgt i32 %x, -1 %2 = select i1 %1, i32 40, i32 42 @@ -302,10 +290,9 @@ define i32 @test74(i32 %x) { define <2 x i32> @test74vec(<2 x i32> %x) { ; CHECK-LABEL: @test74vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], -; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = icmp sgt <2 x i32> %x, %2 = select <2 x i1> %1, <2 x i32> , <2 x i32> @@ -403,9 +390,9 @@ define i32 @test15g(i32 %X) { ;; (a & 8) ? -9 : -1 define i32 @test15h(i32 %X) { ; CHECK-LABEL: @test15h( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], -9 -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 8 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[T1]], -1 +; CHECK-NEXT: ret i32 [[TMP1]] ; %t1 = and i32 %X, 8 %t2 = icmp ne i32 %t1, 0 @@ -416,11 +403,10 @@ define i32 @test15h(i32 %X) { ;; (a & 2) ? 577 : 1089 define i32 @test15i(i32 %X) { ; CHECK-LABEL: @test15i( -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], 8 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[T1]], 512 -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 512 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 577 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 2 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T2]], i32 1089, i32 577 +; CHECK-NEXT: ret i32 [[T3]] ; %t1 = and i32 %X, 2 %t2 = icmp ne i32 %t1, 0 @@ -431,10 +417,10 @@ define i32 @test15i(i32 %X) { ;; (a & 2) ? 1089 : 577 define i32 @test15j(i32 %X) { ; CHECK-LABEL: @test15j( -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[X:%.*]], 8 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[T1]], 512 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 577 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X:%.*]], 2 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T2]], i32 577, i32 1089 +; CHECK-NEXT: ret i32 [[T3]] ; %t1 = and i32 %X, 2 %t2 = icmp ne i32 %t1, 0 @@ -521,7 +507,7 @@ define i32 @set_to_clear(i32 %x) { define i8 @clear_to_set_decomposebittest(i8 %x) { ; CHECK-LABEL: @clear_to_set_decomposebittest( ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -125 +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -125 ; CHECK-NEXT: ret i8 [[TMP2]] ; %t2 = icmp sgt i8 %x, -1 @@ -560,7 +546,7 @@ define i8 @set_to_set_decomposebittest(i8 %x) { define i8 @set_to_clear_decomposebittest(i8 %x) { ; CHECK-LABEL: @set_to_clear_decomposebittest( ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -125 +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -125 ; CHECK-NEXT: ret i8 [[TMP2]] ; %t2 = icmp slt i8 %x, 0 @@ -574,10 +560,9 @@ define i8 @set_to_clear_decomposebittest(i8 %x) { define i8 @clear_to_set_decomposebittest_extra_use(i8 %x) { ; CHECK-LABEL: @clear_to_set_decomposebittest_extra_use( ; CHECK-NEXT: [[T2:%.*]] = icmp sgt i8 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -125 +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T2]], i8 -125, i8 3 ; CHECK-NEXT: call void @use1(i1 [[T2]]) -; CHECK-NEXT: ret i8 [[TMP2]] +; CHECK-NEXT: ret i8 [[T3]] ; %t2 = icmp sgt i8 %x, -1 %t3 = select i1 %t2, i8 131, i8 3 @@ -591,10 +576,9 @@ define i8 @clear_to_set_decomposebittest_extra_use(i8 %x) { define i8 @clear_to_clear_decomposebittest_extra_use(i8 %x) { ; CHECK-LABEL: @clear_to_clear_decomposebittest_extra_use( ; CHECK-NEXT: [[T2:%.*]] = icmp sgt i8 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], 3 +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T2]], i8 3, i8 -125 ; CHECK-NEXT: call void @use1(i1 [[T2]]) -; CHECK-NEXT: ret i8 [[TMP2]] +; CHECK-NEXT: ret i8 [[T3]] ; %t2 = icmp sgt i8 %x, -1 %t3 = select i1 %t2, i8 3, i8 131 @@ -608,10 +592,9 @@ define i8 @clear_to_clear_decomposebittest_extra_use(i8 %x) { define i8 @set_to_set_decomposebittest_extra_use(i8 %x) { ; CHECK-LABEL: @set_to_set_decomposebittest_extra_use( ; CHECK-NEXT: [[T2:%.*]] = icmp slt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], 3 +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T2]], i8 -125, i8 3 ; CHECK-NEXT: call void @use1(i1 [[T2]]) -; CHECK-NEXT: ret i8 [[TMP2]] +; CHECK-NEXT: ret i8 [[T3]] ; %t2 = icmp slt i8 %x, 0 %t3 = select i1 %t2, i8 131, i8 3 @@ -625,10 +608,9 @@ define i8 @set_to_set_decomposebittest_extra_use(i8 %x) { define i8 @set_to_clear_decomposebittest_extra_use(i8 %x) { ; CHECK-LABEL: @set_to_clear_decomposebittest_extra_use( ; CHECK-NEXT: [[T2:%.*]] = icmp slt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -125 +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T2]], i8 3, i8 -125 ; CHECK-NEXT: call void @use1(i1 [[T2]]) -; CHECK-NEXT: ret i8 [[TMP2]] +; CHECK-NEXT: ret i8 [[T3]] ; %t2 = icmp slt i8 %x, 0 %t3 = select i1 %t2, i8 3, i8 131 diff --git a/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll b/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll index aa82eca522f..551efa7078a 100644 --- a/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll +++ b/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll @@ -43,14 +43,10 @@ exit: define i32 @compare_against_zero(i32 %x) { ; CHECK-LABEL: @compare_against_zero( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[X]], 31 -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], 1 -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP1]], i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[SELECT2]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] ; CHECK: callfoo: -; CHECK-NEXT: call void @foo(i32 [[SELECT2]]) +; CHECK-NEXT: call void @foo(i32 1) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 42 @@ -273,15 +269,10 @@ exit: define i32 @compare_against_zero_non_idiomatic_add(i32 %x) { ; CHECK-LABEL: @compare_against_zero_non_idiomatic_add( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[X]], 31 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -431 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], 425 -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP1]], i32 0, i32 [[TMP2]] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[SELECT2]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] ; CHECK: callfoo: -; CHECK-NEXT: call void @foo(i32 [[SELECT2]]) +; CHECK-NEXT: call void @foo(i32 425) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 42 @@ -336,15 +327,10 @@ exit: define i32 @compare_against_zero_non_idiomatic_or(i32 %x) { ; CHECK-LABEL: @compare_against_zero_non_idiomatic_or( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[X]], 31 -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], -430 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 425 -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP1]], i32 0, i32 [[TMP2]] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[SELECT2]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] ; CHECK: callfoo: -; CHECK-NEXT: call void @foo(i32 [[SELECT2]]) +; CHECK-NEXT: call void @foo(i32 425) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 42 @@ -402,17 +388,10 @@ exit: define i32 @compare_against_zero_type_mismatch_idiomatic(i64 %x) { ; CHECK-LABEL: @compare_against_zero_type_mismatch_idiomatic( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X]], 62 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], -1 -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP1]], i32 0, i32 [[TMP4]] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[SELECT2]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] ; CHECK: callfoo: -; CHECK-NEXT: call void @foo(i32 [[SELECT2]]) +; CHECK-NEXT: call void @foo(i32 1) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 42 @@ -437,17 +416,10 @@ exit: define i32 @compare_against_zero_type_mismatch_non_idiomatic_1(i64 %x) { ; CHECK-LABEL: @compare_against_zero_type_mismatch_non_idiomatic_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X]], 60 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], -7 -; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP1]], i32 0, i32 [[TMP4]] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[SELECT2]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]] ; CHECK: callfoo: -; CHECK-NEXT: call void @foo(i32 [[SELECT2]]) +; CHECK-NEXT: call void @foo(i32 1) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i32 42