1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

[Codegen] TargetLowering::prepareUREMEqFold(): x u% C1 ==/!= C2 (PR35479)

Summary:
The current lowering is:
```
Name: (X % C1) == C2 -> X * C3 <= C4 || false
Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = -1 /u C1
%n0 = mul i8 %x, C3
%n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right
%n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n3 = or i8 %n1, %n2 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n3, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/2xC
https://rise4fun.com/Alive/jpb5

However, we can support non-tautological cases `C1 u> C2` too.
Said handling consists of two parts:
* `C2 u<= (-1 %u C1)`. It just works. We only have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`
```
Name: (X % C1) == C2 -> (X - C2) * C3 <= C4   iff C2 u<= (-1 %u C1)
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u<= (-1 %u C1)
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = (-1 /u C1)
%n0 = sub i8 %x, C2
%n1 = mul i8 %n0, C3
%n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right
%n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n4 = or i8 %n2, %n3 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n4, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/m4P
https://rise4fun.com/Alive/SKrx
* `C2 u> (-1 %u C1)`. We also have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`,
  and we have to decrement C4:
```
Name: (X % C1) == C2 -> (X - C2) * C3 <= C4   iff C2 u> (-1 %u C1)
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u> (-1 %u C1)
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = (-1 /u C1)-1
%n0 = sub i8 %x, C2
%n1 = mul i8 %n0, C3
%n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right
%n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n4 = or i8 %n2, %n3 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n4, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/d40
https://rise4fun.com/Alive/8cF

I believe this concludes `x u% C1 ==/!= C2` lowering.
In fact, clang is may now be better in this regard than gcc:
as it can be seen from `@t32_6_4` test, we do lower `x % 6 == 4`
via this pattern, while gcc does not: https://godbolt.org/z/XNU2z9
And all the general alive proofs say this is legal.
And manual checking agrees: https://rise4fun.com/Alive/WA2

Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=35479 | PR35479 ]].

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: nick, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70053
This commit is contained in:
Roman Lebedev 2019-11-22 15:22:42 +03:00
parent e0449de48b
commit 9e97862e1f
5 changed files with 320 additions and 483 deletions

View File

@ -4943,7 +4943,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
SmallVector<SDNode *, 4> Built;
SmallVector<SDNode *, 5> Built;
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
for (SDNode *N : Built)
@ -4978,6 +4978,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
bool ComparingWithAllZeros = true;
bool AllComparisonsWithNonZerosAreTautological = true;
bool HadTautologicalLanes = false;
bool AllLanesAreTautological = true;
bool HadEvenDivisor = false;
@ -4993,6 +4995,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
ComparingWithAllZeros &= Cmp.isNullValue();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
// But we will only be able to produce the comparison that will give the
@ -5000,12 +5004,6 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
bool TautologicalInvertedLane = D.ule(Cmp);
HadTautologicalInvertedLanes |= TautologicalInvertedLane;
// If we are checking that remainder is something smaller than the divisor,
// then this comparison isn't tautological. For now this is not handled,
// other than the comparison that remainder is zero.
if (!Cmp.isNullValue() && !TautologicalInvertedLane)
return false;
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
@ -5013,6 +5011,12 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
if (!Cmp.isNullValue())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
@ -5033,8 +5037,15 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) / D)
APInt Q = APInt::getAllOnesValue(W).udiv(D);
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
@ -5093,6 +5104,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
QVal = QAmts[0];
}
if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
if (!isOperationLegalOrCustom(ISD::SUB, VT))
return SDValue(); // FIXME: Could/should use `ISD::ADD`?
assert(CompTargetNode.getValueType() == N.getValueType() &&
"Expecting that the types on LHS and RHS of comparisons match.");
N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
}
// (mul N, P)
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
Created.push_back(Op0.getNode());

View File

@ -6,12 +6,10 @@ define i1 @t32_3_1(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #33
; CHECK-NEXT: add w8, w8, w8, lsl #1
; CHECK-NEXT: sub w8, w0, w8
; CHECK-NEXT: cmp w8, #1 // =1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 1
@ -23,12 +21,11 @@ define i1 @t32_3_2(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #33
; CHECK-NEXT: add w8, w8, w8, lsl #1
; CHECK-NEXT: sub w8, w0, w8
; CHECK-NEXT: cmp w8, #2 // =2
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #-1431655766
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 2
@ -41,12 +38,10 @@ define i1 @t32_5_1(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: add w8, w8, w8, lsl #2
; CHECK-NEXT: sub w8, w0, w8
; CHECK-NEXT: cmp w8, #1 // =1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 1
@ -58,12 +53,11 @@ define i1 @t32_5_2(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: add w8, w8, w8, lsl #2
; CHECK-NEXT: sub w8, w0, w8
; CHECK-NEXT: cmp w8, #2 // =2
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #1717986918
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 2
@ -75,12 +69,11 @@ define i1 @t32_5_3(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: add w8, w8, w8, lsl #2
; CHECK-NEXT: sub w8, w0, w8
; CHECK-NEXT: cmp w8, #3 // =3
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #-1717986919
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 3
@ -92,12 +85,11 @@ define i1 @t32_5_4(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: add w8, w8, w8, lsl #2
; CHECK-NEXT: sub w8, w0, w8
; CHECK-NEXT: cmp w8, #4 // =4
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #-858993460
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 4
@ -110,12 +102,13 @@ define i1 @t32_6_1(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: mov w9, #6
; CHECK-NEXT: msub w8, w8, w9, w0
; CHECK-NEXT: cmp w8, #1 // =1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 1
@ -127,12 +120,13 @@ define i1 @t32_6_2(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: mov w9, #6
; CHECK-NEXT: msub w8, w8, w9, w0
; CHECK-NEXT: cmp w8, #2 // =2
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w9, #-1431655766
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 2
@ -144,12 +138,13 @@ define i1 @t32_6_3(i32 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: mov w9, #6
; CHECK-NEXT: msub w8, w8, w9, w0
; CHECK-NEXT: cmp w8, #3 // =3
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mul w8, w0, w8
; CHECK-NEXT: sub w8, w8, #1 // =1
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 3
@ -160,13 +155,15 @@ define i1 @t32_6_4(i32 %X) nounwind {
; CHECK-LABEL: t32_6_4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: mov w9, #21844
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: mov w9, #6
; CHECK-NEXT: msub w8, w8, w9, w0
; CHECK-NEXT: cmp w8, #4 // =4
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: movk w9, #21845, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43690
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 4
@ -177,13 +174,15 @@ define i1 @t32_6_5(i32 %X) nounwind {
; CHECK-LABEL: t32_6_5:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: mov w9, #43689
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #34
; CHECK-NEXT: mov w9, #6
; CHECK-NEXT: msub w8, w8, w9, w0
; CHECK-NEXT: cmp w8, #5 // =5
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43690
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 5
@ -199,12 +198,11 @@ define i1 @t16_3_2(i16 %X) nounwind {
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: umull x9, w8, w9
; CHECK-NEXT: lsr x9, x9, #33
; CHECK-NEXT: add w9, w9, w9, lsl #1
; CHECK-NEXT: sub w8, w8, w9
; CHECK-NEXT: cmp w8, #2 // =2
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w10, #-1431655766
; CHECK-NEXT: madd w8, w8, w9, w10
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i16 %X, 3
%cmp = icmp eq i16 %urem, 2
@ -217,12 +215,11 @@ define i1 @t8_3_2(i8 %X) nounwind {
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: umull x9, w8, w9
; CHECK-NEXT: lsr x9, x9, #33
; CHECK-NEXT: add w9, w9, w9, lsl #1
; CHECK-NEXT: sub w8, w8, w9
; CHECK-NEXT: cmp w8, #2 // =2
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov w10, #-1431655766
; CHECK-NEXT: madd w8, w8, w9, w10
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i8 %X, 3
%cmp = icmp eq i8 %urem, 2
@ -234,12 +231,11 @@ define i1 @t64_3_2(i64 %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-6148914691236517206
; CHECK-NEXT: movk x8, #43691
; CHECK-NEXT: umulh x8, x0, x8
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: add x8, x8, x8, lsl #1
; CHECK-NEXT: sub x8, x0, x8
; CHECK-NEXT: cmp x8, #2 // =2
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: mov x9, #-6148914691236517206
; CHECK-NEXT: madd x8, x0, x8, x9
; CHECK-NEXT: mov x9, #6148914691236517205
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i64 %X, 3
%cmp = icmp eq i64 %urem, 2

View File

@ -4,18 +4,16 @@
define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
; CHECK-LABEL: t32_3:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1]
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: adrp x9, .LCPI0_0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_0]
; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ushr v1.4s, v1.4s, #1
; CHECK-NEXT: movi v3.4s, #3
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
@ -26,18 +24,17 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
; CHECK-LABEL: t32_5:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: adrp x9, .LCPI1_0
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_0]
; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ushr v1.4s, v1.4s, #2
; CHECK-NEXT: movi v3.4s, #5
; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mov w9, #13106
; CHECK-NEXT: movk w9, #13107, lsl #16
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
; CHECK-NEXT: dup v1.4s, w9
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>

View File

@ -5,27 +5,18 @@
define i1 @t32_3_1(i32 %X) nounwind {
; X86-LABEL: t32_3_1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $1, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_3_1:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $33, %rcx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 1
@ -35,27 +26,18 @@ define i1 @t32_3_1(i32 %X) nounwind {
define i1 @t32_3_2(i32 %X) nounwind {
; X86-LABEL: t32_3_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $2, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_3_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $33, %rcx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $2, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 2
@ -66,27 +48,18 @@ define i1 @t32_3_2(i32 %X) nounwind {
define i1 @t32_5_1(i32 %X) nounwind {
; X86-LABEL: t32_5_1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl $2, %edx
; X86-NEXT: leal (%edx,%edx,4), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $1, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $858993459, %eax # imm = 0x33333333
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_1:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: leal (%rcx,%rcx,4), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $858993459, %eax # imm = 0x33333333
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 1
@ -96,27 +69,18 @@ define i1 @t32_5_1(i32 %X) nounwind {
define i1 @t32_5_2(i32 %X) nounwind {
; X86-LABEL: t32_5_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl $2, %edx
; X86-NEXT: leal (%edx,%edx,4), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $2, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $1717986918, %eax # imm = 0x66666666
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: leal (%rcx,%rcx,4), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $2, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $1717986918, %eax # imm = 0x66666666
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 2
@ -126,27 +90,18 @@ define i1 @t32_5_2(i32 %X) nounwind {
define i1 @t32_5_3(i32 %X) nounwind {
; X86-LABEL: t32_5_3:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl $2, %edx
; X86-NEXT: leal (%edx,%edx,4), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $3, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $-1717986919, %eax # imm = 0x99999999
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_3:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: leal (%rcx,%rcx,4), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $3, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $-1717986919, %eax # imm = 0x99999999
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 3
@ -156,27 +111,18 @@ define i1 @t32_5_3(i32 %X) nounwind {
define i1 @t32_5_4(i32 %X) nounwind {
; X86-LABEL: t32_5_4:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl $2, %edx
; X86-NEXT: leal (%edx,%edx,4), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $4, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_4:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: leal (%rcx,%rcx,4), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $4, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 4
@ -187,29 +133,20 @@ define i1 @t32_5_4(i32 %X) nounwind {
define i1 @t32_6_1(i32 %X) nounwind {
; X86-LABEL: t32_6_1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: andl $-2, %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $1, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_1:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: addl %ecx, %ecx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 1
@ -219,29 +156,20 @@ define i1 @t32_6_1(i32 %X) nounwind {
define i1 @t32_6_2(i32 %X) nounwind {
; X86-LABEL: t32_6_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: andl $-2, %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $2, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: addl %ecx, %ecx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $2, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 2
@ -251,29 +179,20 @@ define i1 @t32_6_2(i32 %X) nounwind {
define i1 @t32_6_3(i32 %X) nounwind {
; X86-LABEL: t32_6_3:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: andl $-2, %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $3, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: decl %eax
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_3:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: addl %ecx, %ecx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $3, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: decl %eax
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 3
@ -283,29 +202,20 @@ define i1 @t32_6_3(i32 %X) nounwind {
define i1 @t32_6_4(i32 %X) nounwind {
; X86-LABEL: t32_6_4:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: andl $-2, %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $4, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $1431655764, %eax # imm = 0x55555554
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_4:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: addl %ecx, %ecx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $4, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $1431655764, %eax # imm = 0x55555554
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 4
@ -315,29 +225,20 @@ define i1 @t32_6_4(i32 %X) nounwind {
define i1 @t32_6_5(i32 %X) nounwind {
; X86-LABEL: t32_6_5:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl %edx
; X86-NEXT: andl $-2, %edx
; X86-NEXT: leal (%edx,%edx,2), %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: cmpl $5, %ecx
; X86-NEXT: sete %al
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_5:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
; X64-NEXT: imulq %rax, %rcx
; X64-NEXT: shrq $34, %rcx
; X64-NEXT: addl %ecx, %ecx
; X64-NEXT: leal (%rcx,%rcx,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpl $5, %edi
; X64-NEXT: sete %al
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 5
@ -350,24 +251,20 @@ define i1 @t32_6_5(i32 %X) nounwind {
define i1 @t16_3_2(i16 %X) nounwind {
; X86-LABEL: t16_3_2:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $43691, %eax, %ecx # imm = 0xAAAB
; X86-NEXT: shrl $17, %ecx
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmpw $2, %ax
; X86-NEXT: sete %al
; X86-NEXT: imull $-21845, {{[0-9]+}}(%esp), %eax # imm = 0xAAAB
; X86-NEXT: addl $-21846, %eax # imm = 0xAAAA
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: cmpl $21845, %eax # imm = 0x5555
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t16_3_2:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: imull $43691, %eax, %eax # imm = 0xAAAB
; X64-NEXT: shrl $17, %eax
; X64-NEXT: leal (%rax,%rax,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: cmpw $2, %di
; X64-NEXT: sete %al
; X64-NEXT: imull $-21845, %edi, %eax # imm = 0xAAAB
; X64-NEXT: addl $-21846, %eax # imm = 0xAAAA
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: cmpl $21845, %eax # imm = 0x5555
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i16 %X, 3
%cmp = icmp eq i16 %urem, 2
@ -377,24 +274,18 @@ define i1 @t16_3_2(i16 %X) nounwind {
define i1 @t8_3_2(i8 %X) nounwind {
; X86-LABEL: t8_3_2:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $171, %eax, %ecx
; X86-NEXT: shrl $9, %ecx
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
; X86-NEXT: subb %cl, %al
; X86-NEXT: cmpb $2, %al
; X86-NEXT: sete %al
; X86-NEXT: imull $-85, {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb $-86, %al
; X86-NEXT: cmpb $85, %al
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t8_3_2:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: imull $171, %eax, %ecx
; X64-NEXT: shrl $9, %ecx
; X64-NEXT: leal (%rcx,%rcx,2), %ecx
; X64-NEXT: subb %cl, %al
; X64-NEXT: cmpb $2, %al
; X64-NEXT: sete %al
; X64-NEXT: imull $-85, %edi, %eax
; X64-NEXT: addb $-86, %al
; X64-NEXT: cmpb $85, %al
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i8 %X, 3
%cmp = icmp eq i8 %urem, 2
@ -419,14 +310,13 @@ define i1 @t64_3_2(i64 %X) nounwind {
;
; X64-LABEL: t64_3_2:
; X64: # %bb.0:
; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %rcx
; X64-NEXT: shrq %rdx
; X64-NEXT: leaq (%rdx,%rdx,2), %rax
; X64-NEXT: subq %rax, %rdi
; X64-NEXT: cmpq $2, %rdi
; X64-NEXT: sete %al
; X64-NEXT: movabsq $-6148914691236517205, %rax # imm = 0xAAAAAAAAAAAAAAAB
; X64-NEXT: imulq %rdi, %rax
; X64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
; X64-NEXT: addq %rax, %rcx
; X64-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
; X64-NEXT: cmpq %rax, %rcx
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i64 %X, 3
%cmp = icmp eq i64 %urem, 2

View File

@ -8,77 +8,52 @@
define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: t32_3:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: psubd {{.*}}(%rip), %xmm0
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrld $1, %xmm2
; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm3
; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm3
; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-SSE41-LABEL: t32_3:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: psrld $1, %xmm2
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
; CHECK-SSE41-NEXT: psubd {{.*}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,1431655764,1431655764,1431655764]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE41-NEXT: retq
;
; CHECK-AVX1-LABEL: t32_3:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: t32_3:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512VL-LABEL: t32_3:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: retq
%urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
@ -88,77 +63,53 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: t32_5:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: psubd {{.*}}(%rip), %xmm0
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrld $2, %xmm2
; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
; CHECK-SSE2-NEXT: pslld $2, %xmm1
; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm3
; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm3
; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-SSE41-LABEL: t32_5:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: psrld $2, %xmm2
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
; CHECK-SSE41-NEXT: psubd {{.*}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,858993458,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE41-NEXT: retq
;
; CHECK-AVX1-LABEL: t32_5:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: t32_5:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [858993458,858993458,858993458,858993458]
; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512VL-LABEL: t32_5:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: retq
%urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4>
@ -233,16 +184,11 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: t32_6_part0:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: retq
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
@ -317,16 +263,11 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: t32_6_part1:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: retq
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
%cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0>
@ -415,18 +356,12 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: t32_tautological:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; CHECK-AVX512VL-NEXT: retq
%urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>