mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
9e97862e1f
Summary: The current lowering is: ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 However, we can support non-tautological cases `C1 u> C2` too. Said handling consists of two parts: * `C2 u<= (-1 %u C1)`. It just works. We only have to change `(X % C1) == C2` into `((X - C2) % C1) == 0` ``` Name: (X % C1) == C2 -> (X - C2) * C3 <= C4 iff C2 u<= (-1 %u C1) Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u<= (-1 %u C1) %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = (-1 /u C1) %n0 = sub i8 %x, C2 %n1 = mul i8 %n0, C3 %n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right %n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n4 = or i8 %n2, %n3 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n4, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/m4P https://rise4fun.com/Alive/SKrx * `C2 u> (-1 %u C1)`. We also have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`, and we have to decrement C4: ``` Name: (X % C1) == C2 -> (X - C2) * C3 <= C4 iff C2 u> (-1 %u C1) Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u> (-1 %u C1) %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = (-1 /u C1)-1 %n0 = sub i8 %x, C2 %n1 = mul i8 %n0, C3 %n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right %n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n4 = or i8 %n2, %n3 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n4, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/d40 https://rise4fun.com/Alive/8cF I believe this concludes `x u% C1 ==/!= C2` lowering. In fact, clang is may now be better in this regard than gcc: as it can be seen from `@t32_6_4` test, we do lower `x % 6 == 4` via this pattern, while gcc does not: https://godbolt.org/z/XNU2z9 And all the general alive proofs say this is legal. And manual checking agrees: https://rise4fun.com/Alive/WA2 Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=35479 | PR35479 ]]. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70053
325 lines
9.6 KiB
LLVM
325 lines
9.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
|
|
|
|
define i1 @t32_3_1(i32 %X) nounwind {
|
|
; X86-LABEL: t32_3_1:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
|
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_3_1:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
|
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 3
|
|
%cmp = icmp eq i32 %urem, 1
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_3_2(i32 %X) nounwind {
|
|
; X86-LABEL: t32_3_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
|
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_3_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
|
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 3
|
|
%cmp = icmp eq i32 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
define i1 @t32_5_1(i32 %X) nounwind {
|
|
; X86-LABEL: t32_5_1:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
|
; X86-NEXT: addl $858993459, %eax # imm = 0x33333333
|
|
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_5_1:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
|
; X64-NEXT: addl $858993459, %eax # imm = 0x33333333
|
|
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 1
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_5_2(i32 %X) nounwind {
|
|
; X86-LABEL: t32_5_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
|
; X86-NEXT: addl $1717986918, %eax # imm = 0x66666666
|
|
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_5_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
|
; X64-NEXT: addl $1717986918, %eax # imm = 0x66666666
|
|
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_5_3(i32 %X) nounwind {
|
|
; X86-LABEL: t32_5_3:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
|
; X86-NEXT: addl $-1717986919, %eax # imm = 0x99999999
|
|
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_5_3:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
|
; X64-NEXT: addl $-1717986919, %eax # imm = 0x99999999
|
|
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 3
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_5_4(i32 %X) nounwind {
|
|
; X86-LABEL: t32_5_4:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
|
|
; X86-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
|
|
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_5_4:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
|
|
; X64-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
|
|
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 5
|
|
%cmp = icmp eq i32 %urem, 4
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
define i1 @t32_6_1(i32 %X) nounwind {
|
|
; X86-LABEL: t32_6_1:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
|
; X86-NEXT: rorl %eax
|
|
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_6_1:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
|
|
; X64-NEXT: rorl %eax
|
|
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 1
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_2(i32 %X) nounwind {
|
|
; X86-LABEL: t32_6_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
|
; X86-NEXT: rorl %eax
|
|
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_6_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
|
|
; X64-NEXT: rorl %eax
|
|
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_3(i32 %X) nounwind {
|
|
; X86-LABEL: t32_6_3:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: decl %eax
|
|
; X86-NEXT: rorl %eax
|
|
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_6_3:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: decl %eax
|
|
; X64-NEXT: rorl %eax
|
|
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 3
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_4(i32 %X) nounwind {
|
|
; X86-LABEL: t32_6_4:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: addl $1431655764, %eax # imm = 0x55555554
|
|
; X86-NEXT: rorl %eax
|
|
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_6_4:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: addl $1431655764, %eax # imm = 0x55555554
|
|
; X64-NEXT: rorl %eax
|
|
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 4
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t32_6_5(i32 %X) nounwind {
|
|
; X86-LABEL: t32_6_5:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
|
|
; X86-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
|
|
; X86-NEXT: rorl %eax
|
|
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t32_6_5:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
|
|
; X64-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
|
|
; X64-NEXT: rorl %eax
|
|
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i32 %X, 6
|
|
%cmp = icmp eq i32 %urem, 5
|
|
ret i1 %cmp
|
|
}
|
|
|
|
;-------------------------------------------------------------------------------
|
|
; Other widths.
|
|
|
|
define i1 @t16_3_2(i16 %X) nounwind {
|
|
; X86-LABEL: t16_3_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-21845, {{[0-9]+}}(%esp), %eax # imm = 0xAAAB
|
|
; X86-NEXT: addl $-21846, %eax # imm = 0xAAAA
|
|
; X86-NEXT: movzwl %ax, %eax
|
|
; X86-NEXT: cmpl $21845, %eax # imm = 0x5555
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t16_3_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-21845, %edi, %eax # imm = 0xAAAB
|
|
; X64-NEXT: addl $-21846, %eax # imm = 0xAAAA
|
|
; X64-NEXT: movzwl %ax, %eax
|
|
; X64-NEXT: cmpl $21845, %eax # imm = 0x5555
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i16 %X, 3
|
|
%cmp = icmp eq i16 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t8_3_2(i8 %X) nounwind {
|
|
; X86-LABEL: t8_3_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: imull $-85, {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: addb $-86, %al
|
|
; X86-NEXT: cmpb $85, %al
|
|
; X86-NEXT: setb %al
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t8_3_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: imull $-85, %edi, %eax
|
|
; X64-NEXT: addb $-86, %al
|
|
; X64-NEXT: cmpb $85, %al
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i8 %X, 3
|
|
%cmp = icmp eq i8 %urem, 2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @t64_3_2(i64 %X) nounwind {
|
|
; X86-LABEL: t64_3_2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: subl $12, %esp
|
|
; X86-NEXT: pushl $0
|
|
; X86-NEXT: pushl $3
|
|
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
|
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
|
; X86-NEXT: calll __umoddi3
|
|
; X86-NEXT: addl $16, %esp
|
|
; X86-NEXT: xorl $2, %eax
|
|
; X86-NEXT: orl %edx, %eax
|
|
; X86-NEXT: sete %al
|
|
; X86-NEXT: addl $12, %esp
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t64_3_2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movabsq $-6148914691236517205, %rax # imm = 0xAAAAAAAAAAAAAAAB
|
|
; X64-NEXT: imulq %rdi, %rax
|
|
; X64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
|
|
; X64-NEXT: addq %rax, %rcx
|
|
; X64-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
|
|
; X64-NEXT: cmpq %rax, %rcx
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: retq
|
|
%urem = urem i64 %X, 3
|
|
%cmp = icmp eq i64 %urem, 2
|
|
ret i1 %cmp
|
|
}
|