mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
9e97862e1f
Summary: The current lowering is: ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 However, we can support non-tautological cases `C1 u> C2` too. Said handling consists of two parts: * `C2 u<= (-1 %u C1)`. It just works. We only have to change `(X % C1) == C2` into `((X - C2) % C1) == 0` ``` Name: (X % C1) == C2 -> (X - C2) * C3 <= C4 iff C2 u<= (-1 %u C1) Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u<= (-1 %u C1) %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = (-1 /u C1) %n0 = sub i8 %x, C2 %n1 = mul i8 %n0, C3 %n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right %n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n4 = or i8 %n2, %n3 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n4, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/m4P https://rise4fun.com/Alive/SKrx * `C2 u> (-1 %u C1)`. We also have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`, and we have to decrement C4: ``` Name: (X % C1) == C2 -> (X - C2) * C3 <= C4 iff C2 u> (-1 %u C1) Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u> (-1 %u C1) %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = (-1 /u C1)-1 %n0 = sub i8 %x, C2 %n1 = mul i8 %n0, C3 %n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right %n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n4 = or i8 %n2, %n3 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n4, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/d40 https://rise4fun.com/Alive/8cF I believe this concludes `x u% C1 ==/!= C2` lowering. In fact, clang is may now be better in this regard than gcc: as it can be seen from `@t32_6_4` test, we do lower `x % 6 == 4` via this pattern, while gcc does not: https://godbolt.org/z/XNU2z9 And all the general alive proofs say this is legal. And manual checking agrees: https://rise4fun.com/Alive/WA2 Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=35479 | PR35479 ]]. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70053
244 lines
6.4 KiB
LLVM
244 lines
6.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
define i1 @t32_3_1(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_3_1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: mov w9, #1431655765
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 3
|
|
%cmp = icmp eq i32 %urem, 1
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_3_2(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_3_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: mov w9, #-1431655766
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #1431655765
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 3
|
|
%cmp = icmp eq i32 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
define i1 @t32_5_1(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_5_1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #52429
|
|
; CHECK-NEXT: movk w8, #52428, lsl #16
|
|
; CHECK-NEXT: mov w9, #858993459
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 1
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_5_2(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_5_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #52429
|
|
; CHECK-NEXT: movk w8, #52428, lsl #16
|
|
; CHECK-NEXT: mov w9, #1717986918
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #858993459
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_5_3(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_5_3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #52429
|
|
; CHECK-NEXT: movk w8, #52428, lsl #16
|
|
; CHECK-NEXT: mov w9, #-1717986919
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #858993459
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 3
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_5_4(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_5_4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #52429
|
|
; CHECK-NEXT: movk w8, #52428, lsl #16
|
|
; CHECK-NEXT: mov w9, #-858993460
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #858993459
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 4
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
define i1 @t32_6_1(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_6_1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: mov w9, #1431655765
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #43691
|
|
; CHECK-NEXT: ror w8, w8, #1
|
|
; CHECK-NEXT: movk w9, #10922, lsl #16
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 1
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_2(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_6_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: mov w9, #-1431655766
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #43691
|
|
; CHECK-NEXT: ror w8, w8, #1
|
|
; CHECK-NEXT: movk w9, #10922, lsl #16
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_3(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_6_3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: mul w8, w0, w8
|
|
; CHECK-NEXT: sub w8, w8, #1 // =1
|
|
; CHECK-NEXT: mov w9, #43691
|
|
; CHECK-NEXT: ror w8, w8, #1
|
|
; CHECK-NEXT: movk w9, #10922, lsl #16
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 3
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_4(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_6_4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: mov w9, #21844
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: movk w9, #21845, lsl #16
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #43690
|
|
; CHECK-NEXT: ror w8, w8, #1
|
|
; CHECK-NEXT: movk w9, #10922, lsl #16
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 4
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_5(i32 %X) nounwind {
|
|
; CHECK-LABEL: t32_6_5:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #43691
|
|
; CHECK-NEXT: mov w9, #43689
|
|
; CHECK-NEXT: movk w8, #43690, lsl #16
|
|
; CHECK-NEXT: movk w9, #43690, lsl #16
|
|
; CHECK-NEXT: madd w8, w0, w8, w9
|
|
; CHECK-NEXT: mov w9, #43690
|
|
; CHECK-NEXT: ror w8, w8, #1
|
|
; CHECK-NEXT: movk w9, #10922, lsl #16
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 5
|
|
ret i1 %cmp
|
|
}
|
|
|
|
;-------------------------------------------------------------------------------
|
|
; Other widths.
|
|
|
|
define i1 @t16_3_2(i16 %X) nounwind {
|
|
; CHECK-LABEL: t16_3_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w9, #43691
|
|
; CHECK-NEXT: and w8, w0, #0xffff
|
|
; CHECK-NEXT: movk w9, #43690, lsl #16
|
|
; CHECK-NEXT: mov w10, #-1431655766
|
|
; CHECK-NEXT: madd w8, w8, w9, w10
|
|
; CHECK-NEXT: mov w9, #1431655765
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i16 %X, 3
|
|
%cmp = icmp eq i16 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t8_3_2(i8 %X) nounwind {
|
|
; CHECK-LABEL: t8_3_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w9, #43691
|
|
; CHECK-NEXT: and w8, w0, #0xff
|
|
; CHECK-NEXT: movk w9, #43690, lsl #16
|
|
; CHECK-NEXT: mov w10, #-1431655766
|
|
; CHECK-NEXT: madd w8, w8, w9, w10
|
|
; CHECK-NEXT: mov w9, #1431655765
|
|
; CHECK-NEXT: cmp w8, w9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i8 %X, 3
|
|
%cmp = icmp eq i8 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t64_3_2(i64 %X) nounwind {
|
|
; CHECK-LABEL: t64_3_2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, #-6148914691236517206
|
|
; CHECK-NEXT: movk x8, #43691
|
|
; CHECK-NEXT: mov x9, #-6148914691236517206
|
|
; CHECK-NEXT: madd x8, x0, x8, x9
|
|
; CHECK-NEXT: mov x9, #6148914691236517205
|
|
; CHECK-NEXT: cmp x8, x9
|
|
; CHECK-NEXT: cset w0, lo
|
|
; CHECK-NEXT: ret
|
|
%urem = urem i64 %X, 3
|
|
%cmp = icmp eq i64 %urem, 2
|
|
ret i1 %cmp
|
|
}
|