1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 11:33:24 +02:00
llvm-mirror/test/CodeGen/AArch64/urem-seteq-nonzero.ll
Roman Lebedev 9e97862e1f [Codegen] TargetLowering::prepareUREMEqFold(): x u% C1 ==/!= C2 (PR35479)
Summary:
The current lowering is:
```
Name: (X % C1) == C2 -> X * C3 <= C4 || false
Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = -1 /u C1
%n0 = mul i8 %x, C3
%n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right
%n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n3 = or i8 %n1, %n2 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n3, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/2xC
https://rise4fun.com/Alive/jpb5

However, we can support non-tautological cases `C1 u> C2` too.
Said handling consists of two parts:
* `C2 u<= (-1 %u C1)`. It just works. We only have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`
```
Name: (X % C1) == C2 -> (X - C2) * C3 <= C4   iff C2 u<= (-1 %u C1)
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u<= (-1 %u C1)
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = (-1 /u C1)
%n0 = sub i8 %x, C2
%n1 = mul i8 %n0, C3
%n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right
%n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n4 = or i8 %n2, %n3 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n4, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/m4P
https://rise4fun.com/Alive/SKrx
* `C2 u> (-1 %u C1)`. We also have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`,
  and we have to decrement C4:
```
Name: (X % C1) == C2 -> (X - C2) * C3 <= C4   iff C2 u> (-1 %u C1)
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u> (-1 %u C1)
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = (-1 /u C1)-1
%n0 = sub i8 %x, C2
%n1 = mul i8 %n0, C3
%n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right
%n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n4 = or i8 %n2, %n3 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n4, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/d40
https://rise4fun.com/Alive/8cF

I believe this concludes `x u% C1 ==/!= C2` lowering.
In fact, clang is may now be better in this regard than gcc:
as it can be seen from `@t32_6_4` test, we do lower `x % 6 == 4`
via this pattern, while gcc does not: https://godbolt.org/z/XNU2z9
And all the general alive proofs say this is legal.
And manual checking agrees: https://rise4fun.com/Alive/WA2

Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=35479 | PR35479 ]].

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: nick, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70053
2019-11-22 15:22:42 +03:00

244 lines
6.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
define i1 @t32_3_1(i32 %X) nounwind {
; CHECK-LABEL: t32_3_1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 1
ret i1 %cmp
}
define i1 @t32_3_2(i32 %X) nounwind {
; CHECK-LABEL: t32_3_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: mov w9, #-1431655766
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 2
ret i1 %cmp
}
define i1 @t32_5_1(i32 %X) nounwind {
; CHECK-LABEL: t32_5_1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 1
ret i1 %cmp
}
define i1 @t32_5_2(i32 %X) nounwind {
; CHECK-LABEL: t32_5_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: mov w9, #1717986918
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 2
ret i1 %cmp
}
define i1 @t32_5_3(i32 %X) nounwind {
; CHECK-LABEL: t32_5_3:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: mov w9, #-1717986919
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 3
ret i1 %cmp
}
define i1 @t32_5_4(i32 %X) nounwind {
; CHECK-LABEL: t32_5_4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: mov w9, #-858993460
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #858993459
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 4
ret i1 %cmp
}
define i1 @t32_6_1(i32 %X) nounwind {
; CHECK-LABEL: t32_6_1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 1
ret i1 %cmp
}
define i1 @t32_6_2(i32 %X) nounwind {
; CHECK-LABEL: t32_6_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: mov w9, #-1431655766
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 2
ret i1 %cmp
}
define i1 @t32_6_3(i32 %X) nounwind {
; CHECK-LABEL: t32_6_3:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: mul w8, w0, w8
; CHECK-NEXT: sub w8, w8, #1 // =1
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 3
ret i1 %cmp
}
define i1 @t32_6_4(i32 %X) nounwind {
; CHECK-LABEL: t32_6_4:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: mov w9, #21844
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: movk w9, #21845, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43690
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 4
ret i1 %cmp
}
define i1 @t32_6_5(i32 %X) nounwind {
; CHECK-LABEL: t32_6_5:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: mov w9, #43689
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: madd w8, w0, w8, w9
; CHECK-NEXT: mov w9, #43690
; CHECK-NEXT: ror w8, w8, #1
; CHECK-NEXT: movk w9, #10922, lsl #16
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 5
ret i1 %cmp
}
;-------------------------------------------------------------------------------
; Other widths.
define i1 @t16_3_2(i16 %X) nounwind {
; CHECK-LABEL: t16_3_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: mov w10, #-1431655766
; CHECK-NEXT: madd w8, w8, w9, w10
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i16 %X, 3
%cmp = icmp eq i16 %urem, 2
ret i1 %cmp
}
define i1 @t8_3_2(i8 %X) nounwind {
; CHECK-LABEL: t8_3_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: mov w10, #-1431655766
; CHECK-NEXT: madd w8, w8, w9, w10
; CHECK-NEXT: mov w9, #1431655765
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i8 %X, 3
%cmp = icmp eq i8 %urem, 2
ret i1 %cmp
}
define i1 @t64_3_2(i64 %X) nounwind {
; CHECK-LABEL: t64_3_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-6148914691236517206
; CHECK-NEXT: movk x8, #43691
; CHECK-NEXT: mov x9, #-6148914691236517206
; CHECK-NEXT: madd x8, x0, x8, x9
; CHECK-NEXT: mov x9, #6148914691236517205
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%urem = urem i64 %X, 3
%cmp = icmp eq i64 %urem, 2
ret i1 %cmp
}