1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00
llvm-mirror/test/CodeGen/X86/parity.ll
Craig Topper 744780ede5 [X86] Add a DAG combine for the __builtin_parity idiom used by clang to enable better codegen
Clang uses "ctpop & 1" to implement __builtin_parity. If the popcnt instruction isn't supported this generates a large amount of code to calculate the population count. Instead we can bisect the data down to a single byte using xor and then check the parity flag.

Even when popcnt is supported, its still a good idea to split 64-bit data on 32-bit targets using an xor in front of a single popcnt. Otherwise we get two popcnts and an add before the and.

I've specifically targeted this at the sizes supported by clang builtins, but we could generalize this if we think that's useful.

Differential Revision: https://reviews.llvm.org/D50165

llvm-svn: 338907
2018-08-03 18:00:29 +00:00

97 lines
3.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-popcnt | FileCheck %s --check-prefix=X86-NOPOPCNT
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-popcnt | FileCheck %s --check-prefix=X64-NOPOPCNT
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
define i32 @parity_32(i32 %x) {
; X86-NOPOPCNT-LABEL: parity_32:
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
; X86-NOPOPCNT-NEXT: shrl $16, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
; X86-NOPOPCNT-NEXT: xorb %ch, %cl
; X86-NOPOPCNT-NEXT: setnp %al
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_32:
; X64-NOPOPCNT: # %bb.0:
; X64-NOPOPCNT-NEXT: movl %edi, %ecx
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
; X64-NOPOPCNT-NEXT: xorl %edi, %ecx
; X64-NOPOPCNT-NEXT: movl %ecx, %edx
; X64-NOPOPCNT-NEXT: shrl $8, %edx
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
; X64-NOPOPCNT-NEXT: xorb %cl, %dl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_32:
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: parity_32:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
; X64-POPCNT-NEXT: andl $1, %eax
; X64-POPCNT-NEXT: retq
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = and i32 %1, 1
ret i32 %2
}
define i64 @parity_64(i64 %x) {
; X86-NOPOPCNT-LABEL: parity_64:
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
; X86-NOPOPCNT-NEXT: shrl $16, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
; X86-NOPOPCNT-NEXT: xorb %ch, %cl
; X86-NOPOPCNT-NEXT: setnp %al
; X86-NOPOPCNT-NEXT: xorl %edx, %edx
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_64:
; X64-NOPOPCNT: # %bb.0:
; X64-NOPOPCNT-NEXT: movq %rdi, %rax
; X64-NOPOPCNT-NEXT: shrq $32, %rax
; X64-NOPOPCNT-NEXT: xorl %edi, %eax
; X64-NOPOPCNT-NEXT: movl %eax, %ecx
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
; X64-NOPOPCNT-NEXT: xorl %eax, %ecx
; X64-NOPOPCNT-NEXT: movl %ecx, %edx
; X64-NOPOPCNT-NEXT: shrl $8, %edx
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
; X64-NOPOPCNT-NEXT: xorb %cl, %dl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_64:
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl %eax, %eax
; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: xorl %edx, %edx
; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: parity_64:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: andl $1, %eax
; X64-POPCNT-NEXT: retq
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = and i64 %1, 1
ret i64 %2
}
declare i32 @llvm.ctpop.i32(i32 %x)
declare i64 @llvm.ctpop.i64(i64 %x)