mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
8ef51c94ae
For logical or/and reductions we emit regular intrinsics @llvm.vector.reduce.or/and.vxi1 calls. These intrinsics are not effective for the logical or/and reductions, especially if the optimizer is able to emit short circuit versions of the scalar or/and instructions and vector code gets less effective than the scalar version. Instead, or reduction for i1 can be represented as: ``` %val = bitcast <ReduxWidth x i1> to iReduxWidth %res = cmp ne iReduxWidth %val, 0 ``` and reduction for i1 can be represented as: ``` %val = bitcast <ReduxWidth x i1> to iReduxWidth %res = cmp eq iReduxWidth %val, 11111 ``` This improves perfromance of the vector code significantly and make it to outperform short circuit scalar code. Part of D57059. Differential Revision: https://reviews.llvm.org/D97406
26 lines
847 B
LLVM
26 lines
847 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
define i1 @reduction_logical_or(<4 x i1> %x) {
|
|
; CHECK-LABEL: @reduction_logical_or(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
|
|
; CHECK-NEXT: ret i1 [[TMP2]]
|
|
;
|
|
%r = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
|
|
ret i1 %r
|
|
}
|
|
|
|
define i1 @reduction_logical_and(<4 x i1> %x) {
|
|
; CHECK-LABEL: @reduction_logical_and(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
|
|
; CHECK-NEXT: ret i1 [[TMP2]]
|
|
;
|
|
%r = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
|
|
ret i1 %r
|
|
}
|
|
|
|
declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
|
|
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
|