1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/X86/urem-seteq-nonzero.ll
Roman Lebedev 9e97862e1f [Codegen] TargetLowering::prepareUREMEqFold(): x u% C1 ==/!= C2 (PR35479)
Summary:
The current lowering is:
```
Name: (X % C1) == C2 -> X * C3 <= C4 || false
Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = -1 /u C1
%n0 = mul i8 %x, C3
%n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right
%n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n3 = or i8 %n1, %n2 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n3, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/2xC
https://rise4fun.com/Alive/jpb5

However, we can support non-tautological cases `C1 u> C2` too.
Said handling consists of two parts:
* `C2 u<= (-1 %u C1)`. It just works. We only have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`
```
Name: (X % C1) == C2 -> (X - C2) * C3 <= C4   iff C2 u<= (-1 %u C1)
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u<= (-1 %u C1)
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = (-1 /u C1)
%n0 = sub i8 %x, C2
%n1 = mul i8 %n0, C3
%n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right
%n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n4 = or i8 %n2, %n3 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n4, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/m4P
https://rise4fun.com/Alive/SKrx
* `C2 u> (-1 %u C1)`. We also have to change `(X % C1) == C2` into `((X - C2) % C1) == 0`,
  and we have to decrement C4:
```
Name: (X % C1) == C2 -> (X - C2) * C3 <= C4   iff C2 u> (-1 %u C1)
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 && C2 u> (-1 %u C1)
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
  =>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = (-1 /u C1)-1
%n0 = sub i8 %x, C2
%n1 = mul i8 %n0, C3
%n2 = lshr i8 %n1, countTrailingZeros(C1) ; rotate right
%n3 = shl i8 %n1, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n4 = or i8 %n2, %n3 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n4, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/d40
https://rise4fun.com/Alive/8cF

I believe this concludes `x u% C1 ==/!= C2` lowering.
In fact, clang is may now be better in this regard than gcc:
as it can be seen from `@t32_6_4` test, we do lower `x % 6 == 4`
via this pattern, while gcc does not: https://godbolt.org/z/XNU2z9
And all the general alive proofs say this is legal.
And manual checking agrees: https://rise4fun.com/Alive/WA2

Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=35479 | PR35479 ]].

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: nick, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70053
2019-11-22 15:22:42 +03:00

325 lines
9.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
define i1 @t32_3_1(i32 %X) nounwind {
; X86-LABEL: t32_3_1:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_3_1:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 1
ret i1 %cmp
}
define i1 @t32_3_2(i32 %X) nounwind {
; X86-LABEL: t32_3_2:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_3_2:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X64-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 3
%cmp = icmp eq i32 %urem, 2
ret i1 %cmp
}
define i1 @t32_5_1(i32 %X) nounwind {
; X86-LABEL: t32_5_1:
; X86: # %bb.0:
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $858993459, %eax # imm = 0x33333333
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_1:
; X64: # %bb.0:
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $858993459, %eax # imm = 0x33333333
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 1
ret i1 %cmp
}
define i1 @t32_5_2(i32 %X) nounwind {
; X86-LABEL: t32_5_2:
; X86: # %bb.0:
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $1717986918, %eax # imm = 0x66666666
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_2:
; X64: # %bb.0:
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $1717986918, %eax # imm = 0x66666666
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 2
ret i1 %cmp
}
define i1 @t32_5_3(i32 %X) nounwind {
; X86-LABEL: t32_5_3:
; X86: # %bb.0:
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $-1717986919, %eax # imm = 0x99999999
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_3:
; X64: # %bb.0:
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $-1717986919, %eax # imm = 0x99999999
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 3
ret i1 %cmp
}
define i1 @t32_5_4(i32 %X) nounwind {
; X86-LABEL: t32_5_4:
; X86: # %bb.0:
; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %eax # imm = 0xCCCCCCCD
; X86-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_5_4:
; X64: # %bb.0:
; X64-NEXT: imull $-858993459, %edi, %eax # imm = 0xCCCCCCCD
; X64-NEXT: addl $-858993460, %eax # imm = 0xCCCCCCCC
; X64-NEXT: cmpl $858993459, %eax # imm = 0x33333333
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 5
%cmp = icmp eq i32 %urem, 4
ret i1 %cmp
}
define i1 @t32_6_1(i32 %X) nounwind {
; X86-LABEL: t32_6_1:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_1:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 1
ret i1 %cmp
}
define i1 @t32_6_2(i32 %X) nounwind {
; X86-LABEL: t32_6_2:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_2:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 2
ret i1 %cmp
}
define i1 @t32_6_3(i32 %X) nounwind {
; X86-LABEL: t32_6_3:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: decl %eax
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_3:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: decl %eax
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827883, %eax # imm = 0x2AAAAAAB
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 3
ret i1 %cmp
}
define i1 @t32_6_4(i32 %X) nounwind {
; X86-LABEL: t32_6_4:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $1431655764, %eax # imm = 0x55555554
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_4:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $1431655764, %eax # imm = 0x55555554
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 4
ret i1 %cmp
}
define i1 @t32_6_5(i32 %X) nounwind {
; X86-LABEL: t32_6_5:
; X86: # %bb.0:
; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %eax # imm = 0xAAAAAAAB
; X86-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
; X86-NEXT: rorl %eax
; X86-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t32_6_5:
; X64: # %bb.0:
; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
; X64-NEXT: addl $-1431655767, %eax # imm = 0xAAAAAAA9
; X64-NEXT: rorl %eax
; X64-NEXT: cmpl $715827882, %eax # imm = 0x2AAAAAAA
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i32 %X, 6
%cmp = icmp eq i32 %urem, 5
ret i1 %cmp
}
;-------------------------------------------------------------------------------
; Other widths.
define i1 @t16_3_2(i16 %X) nounwind {
; X86-LABEL: t16_3_2:
; X86: # %bb.0:
; X86-NEXT: imull $-21845, {{[0-9]+}}(%esp), %eax # imm = 0xAAAB
; X86-NEXT: addl $-21846, %eax # imm = 0xAAAA
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: cmpl $21845, %eax # imm = 0x5555
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t16_3_2:
; X64: # %bb.0:
; X64-NEXT: imull $-21845, %edi, %eax # imm = 0xAAAB
; X64-NEXT: addl $-21846, %eax # imm = 0xAAAA
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: cmpl $21845, %eax # imm = 0x5555
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i16 %X, 3
%cmp = icmp eq i16 %urem, 2
ret i1 %cmp
}
define i1 @t8_3_2(i8 %X) nounwind {
; X86-LABEL: t8_3_2:
; X86: # %bb.0:
; X86-NEXT: imull $-85, {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb $-86, %al
; X86-NEXT: cmpb $85, %al
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: t8_3_2:
; X64: # %bb.0:
; X64-NEXT: imull $-85, %edi, %eax
; X64-NEXT: addb $-86, %al
; X64-NEXT: cmpb $85, %al
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i8 %X, 3
%cmp = icmp eq i8 %urem, 2
ret i1 %cmp
}
define i1 @t64_3_2(i64 %X) nounwind {
; X86-LABEL: t64_3_2:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl $0
; X86-NEXT: pushl $3
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __umoddi3
; X86-NEXT: addl $16, %esp
; X86-NEXT: xorl $2, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: sete %al
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: t64_3_2:
; X64: # %bb.0:
; X64-NEXT: movabsq $-6148914691236517205, %rax # imm = 0xAAAAAAAAAAAAAAAB
; X64-NEXT: imulq %rdi, %rax
; X64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
; X64-NEXT: addq %rax, %rcx
; X64-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
; X64-NEXT: cmpq %rax, %rcx
; X64-NEXT: setb %al
; X64-NEXT: retq
%urem = urem i64 %X, 3
%cmp = icmp eq i64 %urem, 2
ret i1 %cmp
}