1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/AArch64/parity.ll
Craig Topper c6a7e261b5 [SelectionDAG][X86][ARM][AArch64] Add ISD opcode for __builtin_parity. Expand it to shifts and xors.
Clang emits (and (ctpop X), 1) for __builtin_parity. If ctpop
isn't natively supported by the target, this leads to poor codegen
due to the expansion of ctpop being more complex than what is needed
for parity.

This adds a DAG combine to convert the pattern to ISD::PARITY
before operation legalization. Type legalization is updated
to handled Expanding and Promoting this operation. If after type
legalization, CTPOP is supported for this type, LegalizeDAG will
turn it back into CTPOP+AND. Otherwise LegalizeDAG will emit a
series of shifts and xors followed by an AND with 1.

I've avoided vectors in this patch to avoid more legalization
complexity for this patch.

X86 previously had a custom DAG combiner for this. This is now
moved to Custom lowering for the new opcode. There is a minor
regression in vector-reduce-xor-bool.ll, but a follow up patch
can easily fix that.

Fixes PR47433

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D87209
2020-09-12 11:42:18 -07:00

162 lines
4.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
define i4 @parity_4(i4 %x) {
; CHECK-LABEL: parity_4:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xf
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i4 @llvm.ctpop.i4(i4 %x)
%2 = and i4 %1, 1
ret i4 %2
}
define i8 @parity_8(i8 %x) {
; CHECK-LABEL: parity_8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i8 @llvm.ctpop.i8(i8 %x)
%2 = and i8 %1, 1
ret i8 %2
}
define i16 @parity_16(i16 %x) {
; CHECK-LABEL: parity_16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i16 @llvm.ctpop.i16(i16 %x)
%2 = and i16 %1, 1
ret i16 %2
}
define i17 @parity_17(i17 %x) {
; CHECK-LABEL: parity_17:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0x1ffff
; CHECK-NEXT: eor w8, w8, w8, lsr #16
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i17 @llvm.ctpop.i17(i17 %x)
%2 = and i17 %1, 1
ret i17 %2
}
define i32 @parity_32(i32 %x) {
; CHECK-LABEL: parity_32:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w0, lsr #16
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = and i32 %1, 1
ret i32 %2
}
define i64 @parity_64(i64 %x) {
; CHECK-LABEL: parity_64:
; CHECK: // %bb.0:
; CHECK-NEXT: eor x8, x0, x0, lsr #32
; CHECK-NEXT: eor x8, x8, x8, lsr #16
; CHECK-NEXT: eor x8, x8, x8, lsr #8
; CHECK-NEXT: eor x8, x8, x8, lsr #4
; CHECK-NEXT: eor x8, x8, x8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and x0, x8, #0x1
; CHECK-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = and i64 %1, 1
ret i64 %2
}
define i32 @parity_64_trunc(i64 %x) {
; CHECK-LABEL: parity_64_trunc:
; CHECK: // %bb.0:
; CHECK-NEXT: eor x8, x0, x0, lsr #32
; CHECK-NEXT: eor x8, x8, x8, lsr #16
; CHECK-NEXT: eor x8, x8, x8, lsr #8
; CHECK-NEXT: eor x8, x8, x8, lsr #4
; CHECK-NEXT: eor x8, x8, x8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = trunc i64 %1 to i32
%3 = and i32 %2, 1
ret i32 %3
}
define i8 @parity_32_trunc(i32 %x) {
; CHECK-LABEL: parity_32_trunc:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w0, lsr #16
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = trunc i32 %1 to i8
%3 = and i8 %2, 1
ret i8 %3
}
define i32 @parity_8_zext(i8 %x) {
; CHECK-LABEL: parity_8_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%a = zext i8 %x to i32
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
%c = and i32 %b, 1
ret i32 %c
}
define i32 @parity_8_mask(i32 %x) {
; CHECK-LABEL: parity_8_mask:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%a = and i32 %x, 255
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
%c = and i32 %b, 1
ret i32 %c
}
declare i4 @llvm.ctpop.i4(i4 %x)
declare i8 @llvm.ctpop.i8(i8 %x)
declare i16 @llvm.ctpop.i16(i16 %x)
declare i17 @llvm.ctpop.i17(i17 %x)
declare i32 @llvm.ctpop.i32(i32 %x)
declare i64 @llvm.ctpop.i64(i64 %x)