mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[InstCombine] Recommit: Shift amount reassociation: shl-trunc-shl pattern
This was initially committed in r368059 but got reverted in r368084 because there was a faulty logic in how the shift amounts type mismatch was being handled (it simply wasn't). I've added an explicit bailout before we SimplifyAddInst() - i don't think it's designed in general to handle differently-typed values, even though the actual problem only comes from ConstantExpr's. I have also changed the common type deduction, to not just blindly look past zext, but try to do that so that in the end types match. Differential Revision: https://reviews.llvm.org/D65380 llvm-svn: 368141
This commit is contained in:
parent
d4d1331456
commit
3c5960b14a
@ -1270,6 +1270,12 @@ inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
|
||||
return CastClass_match<OpTy, Instruction::ZExt>(Op);
|
||||
}
|
||||
|
||||
template <typename OpTy>
|
||||
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
|
||||
m_ZExtOrSelf(const OpTy &Op) {
|
||||
return m_CombineOr(m_ZExt(Op), Op);
|
||||
}
|
||||
|
||||
template <typename OpTy>
|
||||
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
|
||||
CastClass_match<OpTy, Instruction::SExt>>
|
||||
|
@ -27,42 +27,90 @@ using namespace PatternMatch;
|
||||
// This is valid for any shift, but they must be identical.
|
||||
static Instruction *
|
||||
reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
|
||||
const SimplifyQuery &SQ) {
|
||||
// Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
|
||||
Value *X, *ShAmt1, *ShAmt0;
|
||||
const SimplifyQuery &SQ,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
// Look for a shift of some instruction, ignore zext of shift amount if any.
|
||||
Instruction *Sh0Op0;
|
||||
Value *ShAmt0;
|
||||
if (!match(Sh0,
|
||||
m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0)))))
|
||||
return nullptr;
|
||||
|
||||
// If there is a truncation between the two shifts, we must make note of it
|
||||
// and look through it. The truncation imposes additional constraints on the
|
||||
// transform.
|
||||
Instruction *Sh1;
|
||||
if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
|
||||
m_Instruction(Sh1)),
|
||||
m_Value(ShAmt0))))
|
||||
Value *Trunc = nullptr;
|
||||
match(Sh0Op0,
|
||||
m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)),
|
||||
m_Instruction(Sh1)));
|
||||
|
||||
// Inner shift: (x shiftopcode ShAmt1)
|
||||
// Like with other shift, ignore zext of shift amount if any.
|
||||
Value *X, *ShAmt1;
|
||||
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
|
||||
return nullptr;
|
||||
|
||||
// We have two shift amounts from two different shifts. The types of those
|
||||
// shift amounts may not match. If that's the case let's bailout now..
|
||||
if (ShAmt0->getType() != ShAmt1->getType())
|
||||
return nullptr;
|
||||
|
||||
// The shift opcodes must be identical.
|
||||
Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
|
||||
if (ShiftOpcode != Sh1->getOpcode())
|
||||
return nullptr;
|
||||
|
||||
// Did we match a pattern with truncation ?
|
||||
if (Trunc) {
|
||||
// For right-shifts we can't do any such simplifications. Leave as-is.
|
||||
if (ShiftOpcode != Instruction::BinaryOps::Shl)
|
||||
return nullptr; // FIXME: still could perform constant-folding.
|
||||
// If we saw truncation, we'll need to produce extra instruction,
|
||||
// and for that one of the operands of the shift must be one-use.
|
||||
if (!match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Can we fold (ShAmt0+ShAmt1) ?
|
||||
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
|
||||
SQ.getWithInstruction(Sh0));
|
||||
auto *NewShAmt = dyn_cast_or_null<Constant>(
|
||||
SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false,
|
||||
SQ.getWithInstruction(Sh0)));
|
||||
if (!NewShAmt)
|
||||
return nullptr; // Did not simplify.
|
||||
// Is the new shift amount smaller than the bit width?
|
||||
// FIXME: could also rely on ConstantRange.
|
||||
unsigned BitWidth = X->getType()->getScalarSizeInBits();
|
||||
if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
|
||||
APInt(BitWidth, BitWidth))))
|
||||
return nullptr;
|
||||
// Is the new shift amount smaller than the bit width of inner shift?
|
||||
if (!match(NewShAmt, m_SpecificInt_ICMP(
|
||||
ICmpInst::Predicate::ICMP_ULT,
|
||||
APInt(NewShAmt->getType()->getScalarSizeInBits(),
|
||||
X->getType()->getScalarSizeInBits()))))
|
||||
return nullptr; // FIXME: could perform constant-folding.
|
||||
|
||||
// All good, we can do this fold.
|
||||
NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());
|
||||
|
||||
BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
|
||||
// If both of the original shifts had the same flag set, preserve the flag.
|
||||
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
|
||||
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
|
||||
Sh1->hasNoUnsignedWrap());
|
||||
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
|
||||
Sh1->hasNoSignedWrap());
|
||||
} else {
|
||||
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
|
||||
|
||||
// The flags can only be propagated if there wasn't a trunc.
|
||||
if (!Trunc) {
|
||||
// If the pattern did not involve trunc, and both of the original shifts
|
||||
// had the same flag set, preserve the flag.
|
||||
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
|
||||
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
|
||||
Sh1->hasNoUnsignedWrap());
|
||||
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
|
||||
Sh1->hasNoSignedWrap());
|
||||
} else {
|
||||
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
|
||||
}
|
||||
}
|
||||
return NewShift;
|
||||
|
||||
Instruction *Ret = NewShift;
|
||||
if (Trunc) {
|
||||
Builder.Insert(NewShift);
|
||||
Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType());
|
||||
}
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
||||
// If we have some pattern that leaves only some low bits set, and then performs
|
||||
@ -158,7 +206,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
|
||||
return Res;
|
||||
|
||||
if (Instruction *NewShift =
|
||||
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
|
||||
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ, Builder))
|
||||
return NewShift;
|
||||
|
||||
// (C1 shift (A add C2)) -> (C1 shift C2) shift A)
|
||||
|
@ -12,12 +12,8 @@
|
||||
|
||||
define i16 @t0(i32 %x, i16 %y) {
|
||||
; CHECK-LABEL: @t0(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
||||
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
|
||||
; CHECK-NEXT: ret i16 [[T5]]
|
||||
;
|
||||
%t0 = sub i16 32, %y
|
||||
@ -31,12 +27,8 @@ define i16 @t0(i32 %x, i16 %y) {
|
||||
|
||||
define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
|
||||
; CHECK-LABEL: @t1_vec_splat(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
|
||||
; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -24, i16 -24>
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 8>
|
||||
; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
|
||||
; CHECK-NEXT: ret <2 x i16> [[T5]]
|
||||
;
|
||||
%t0 = sub <2 x i16> <i16 32, i16 32>, %y
|
||||
@ -50,12 +42,8 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
|
||||
|
||||
define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
|
||||
; CHECK-LABEL: @t2_vec_nonsplat(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 30>, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
|
||||
; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -24, i16 0>
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 30>
|
||||
; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
|
||||
; CHECK-NEXT: ret <2 x i16> [[T5]]
|
||||
;
|
||||
%t0 = sub <2 x i16> <i16 32, i16 30>, %y
|
||||
@ -71,12 +59,8 @@ define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
|
||||
|
||||
define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
|
||||
; CHECK-LABEL: @t3_vec_nonsplat_undef0(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
|
||||
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 -24, i16 -24>
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
|
||||
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
|
||||
; CHECK-NEXT: ret <3 x i16> [[T5]]
|
||||
;
|
||||
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
|
||||
@ -90,12 +74,8 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
|
||||
|
||||
define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
|
||||
; CHECK-LABEL: @t4_vec_nonsplat_undef1(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
|
||||
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 undef, i16 -24>
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
|
||||
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
|
||||
; CHECK-NEXT: ret <3 x i16> [[T5]]
|
||||
;
|
||||
%t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
|
||||
@ -109,12 +89,8 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
|
||||
|
||||
define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
|
||||
; CHECK-LABEL: @t5_vec_nonsplat_undef1(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
|
||||
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 undef, i16 -24>
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
|
||||
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
|
||||
; CHECK-NEXT: ret <3 x i16> [[T5]]
|
||||
;
|
||||
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
|
||||
@ -137,9 +113,9 @@ define i16 @t6_extrause0(i32 %x, i16 %y) {
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
||||
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
|
||||
; CHECK-NEXT: call void @use16(i16 [[T3]])
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X]] to i16
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
|
||||
; CHECK-NEXT: ret i16 [[T5]]
|
||||
;
|
||||
%t0 = sub i16 32, %y
|
||||
@ -154,13 +130,10 @@ define i16 @t6_extrause0(i32 %x, i16 %y) {
|
||||
|
||||
define i16 @t7_extrause1(i32 %x, i16 %y) {
|
||||
; CHECK-LABEL: @t7_extrause1(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
|
||||
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
|
||||
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
|
||||
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -24
|
||||
; CHECK-NEXT: call void @use16(i16 [[T4]])
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
|
||||
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
|
||||
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
|
||||
; CHECK-NEXT: ret i16 [[T5]]
|
||||
;
|
||||
%t0 = sub i16 32, %y
|
||||
@ -252,3 +225,20 @@ define i16 @n11(i32 %x, i16 %y) {
|
||||
%t5 = shl i16 %t3, %t4
|
||||
ret i16 %t3
|
||||
}
|
||||
|
||||
; Bit width mismatch of shit amount
|
||||
|
||||
@Y32 = global i32 42
|
||||
@Y16 = global i16 42
|
||||
define i16 @t01(i32 %x) {
|
||||
; CHECK-LABEL: @t01(
|
||||
; CHECK-NEXT: [[T0:%.*]] = shl i32 [[X:%.*]], ptrtoint (i32* @Y32 to i32)
|
||||
; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[T0]] to i16
|
||||
; CHECK-NEXT: [[T2:%.*]] = shl i16 [[T1]], ptrtoint (i16* @Y16 to i16)
|
||||
; CHECK-NEXT: ret i16 [[T2]]
|
||||
;
|
||||
%t0 = shl i32 %x, ptrtoint (i32* @Y32 to i32)
|
||||
%t1 = trunc i32 %t0 to i16
|
||||
%t2 = shl i16 %t1, ptrtoint (i16* @Y16 to i16)
|
||||
ret i16 %t2
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user