mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
c6a7e261b5
Clang emits (and (ctpop X), 1) for __builtin_parity. If ctpop isn't natively supported by the target, this leads to poor codegen due to the expansion of ctpop being more complex than what is needed for parity. This adds a DAG combine to convert the pattern to ISD::PARITY before operation legalization. Type legalization is updated to handled Expanding and Promoting this operation. If after type legalization, CTPOP is supported for this type, LegalizeDAG will turn it back into CTPOP+AND. Otherwise LegalizeDAG will emit a series of shifts and xors followed by an AND with 1. I've avoided vectors in this patch to avoid more legalization complexity for this patch. X86 previously had a custom DAG combiner for this. This is now moved to Custom lowering for the new opcode. There is a minor regression in vector-reduce-xor-bool.ll, but a follow up patch can easily fix that. Fixes PR47433 Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D87209
163 lines
4.4 KiB
LLVM
163 lines
4.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s
|
|
|
|
define i4 @parity_4(i4 %x) {
|
|
; CHECK-LABEL: parity_4:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: and r0, r0, #15
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i4 @llvm.ctpop.i4(i4 %x)
|
|
%2 = and i4 %1, 1
|
|
ret i4 %2
|
|
}
|
|
|
|
define i8 @parity_8(i8 %x) {
|
|
; CHECK-LABEL: parity_8:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: uxtb r0, r0
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i8 @llvm.ctpop.i8(i8 %x)
|
|
%2 = and i8 %1, 1
|
|
ret i8 %2
|
|
}
|
|
|
|
define i16 @parity_16(i16 %x) {
|
|
; CHECK-LABEL: parity_16:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: uxth r0, r0
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #8
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i16 @llvm.ctpop.i16(i16 %x)
|
|
%2 = and i16 %1, 1
|
|
ret i16 %2
|
|
}
|
|
|
|
define i17 @parity_17(i17 %x) {
|
|
; CHECK-LABEL: parity_17:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: bfc r0, #17, #15
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #16
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #8
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i17 @llvm.ctpop.i17(i17 %x)
|
|
%2 = and i17 %1, 1
|
|
ret i17 %2
|
|
}
|
|
|
|
define i32 @parity_32(i32 %x) {
|
|
; CHECK-LABEL: parity_32:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #16
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #8
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
|
|
%2 = and i32 %1, 1
|
|
ret i32 %2
|
|
}
|
|
|
|
define i64 @parity_64(i64 %x) {
|
|
; CHECK-LABEL: parity_64:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: eor r0, r0, r1
|
|
; CHECK-NEXT: mov r1, #0
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #16
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #8
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
|
|
%2 = and i64 %1, 1
|
|
ret i64 %2
|
|
}
|
|
|
|
define i32 @parity_64_trunc(i64 %x) {
|
|
; CHECK-LABEL: parity_64_trunc:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: eor r0, r0, r1
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #16
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #8
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
|
|
%2 = trunc i64 %1 to i32
|
|
%3 = and i32 %2, 1
|
|
ret i32 %3
|
|
}
|
|
|
|
define i8 @parity_32_trunc(i32 %x) {
|
|
; CHECK-LABEL: parity_32_trunc:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #16
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #8
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
|
|
%2 = trunc i32 %1 to i8
|
|
%3 = and i8 %2, 1
|
|
ret i8 %3
|
|
}
|
|
|
|
define i32 @parity_8_zext(i8 %x) {
|
|
; CHECK-LABEL: parity_8_zext:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: uxtb r0, r0
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%a = zext i8 %x to i32
|
|
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
|
|
%c = and i32 %b, 1
|
|
ret i32 %c
|
|
}
|
|
|
|
define i32 @parity_8_mask(i32 %x) {
|
|
; CHECK-LABEL: parity_8_mask:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: uxtb r0, r0
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #4
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #2
|
|
; CHECK-NEXT: eor r0, r0, r0, lsr #1
|
|
; CHECK-NEXT: and r0, r0, #1
|
|
; CHECK-NEXT: bx lr
|
|
%a = and i32 %x, 255
|
|
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
|
|
%c = and i32 %b, 1
|
|
ret i32 %c
|
|
}
|
|
|
|
declare i4 @llvm.ctpop.i4(i4 %x)
|
|
declare i8 @llvm.ctpop.i8(i8 %x)
|
|
declare i16 @llvm.ctpop.i16(i16 %x)
|
|
declare i17 @llvm.ctpop.i17(i17 %x)
|
|
declare i32 @llvm.ctpop.i32(i32 %x)
|
|
declare i64 @llvm.ctpop.i64(i64 %x)
|