mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
[Instcombiner]Improve emission of logical or/and reductions.
For logical or/and reductions we emit regular intrinsics @llvm.vector.reduce.or/and.vxi1 calls. These intrinsics are not effective for the logical or/and reductions, especially if the optimizer is able to emit short circuit versions of the scalar or/and instructions and vector code gets less effective than the scalar version. Instead, or reduction for i1 can be represented as: ``` %val = bitcast <ReduxWidth x i1> to iReduxWidth %res = cmp ne iReduxWidth %val, 0 ``` and reduction for i1 can be represented as: ``` %val = bitcast <ReduxWidth x i1> to iReduxWidth %res = cmp eq iReduxWidth %val, 11111 ``` This improves perfromance of the vector code significantly and make it to outperform short circuit scalar code. Part of D57059. Differential Revision: https://reviews.llvm.org/D97406
This commit is contained in:
parent
5ffeb9384e
commit
8ef51c94ae
@ -1799,6 +1799,34 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::vector_reduce_or:
|
||||
case Intrinsic::vector_reduce_and: {
|
||||
// Canonicalize logical or/and reductions:
|
||||
// Or reduction for i1 is represented as:
|
||||
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
|
||||
// %res = cmp ne iReduxWidth %val, 0
|
||||
// And reduction for i1 is represented as:
|
||||
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
|
||||
// %res = cmp eq iReduxWidth %val, 11111
|
||||
Value *Arg = II->getArgOperand(0);
|
||||
Type *RetTy = II->getType();
|
||||
if (RetTy == Builder.getInt1Ty())
|
||||
if (auto *FVTy = dyn_cast<FixedVectorType>(Arg->getType())) {
|
||||
Value *Res = Builder.CreateBitCast(
|
||||
Arg, Builder.getIntNTy(FVTy->getNumElements()));
|
||||
if (IID == Intrinsic::vector_reduce_and) {
|
||||
Res = Builder.CreateICmpEQ(
|
||||
Res, ConstantInt::getAllOnesValue(Res->getType()));
|
||||
} else {
|
||||
assert(IID == Intrinsic::vector_reduce_or &&
|
||||
"Expected or reduction.");
|
||||
Res = Builder.CreateIsNotNull(Res);
|
||||
}
|
||||
replaceInstUsesWith(CI, Res);
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Handle target specific intrinsics
|
||||
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
|
||||
|
@ -3,8 +3,9 @@
|
||||
|
||||
define i1 @reduction_logical_or(<4 x i1> %x) {
|
||||
; CHECK-LABEL: @reduction_logical_or(
|
||||
; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[X:%.*]])
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%r = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
|
||||
ret i1 %r
|
||||
@ -12,8 +13,9 @@ define i1 @reduction_logical_or(<4 x i1> %x) {
|
||||
|
||||
define i1 @reduction_logical_and(<4 x i1> %x) {
|
||||
; CHECK-LABEL: @reduction_logical_and(
|
||||
; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[X:%.*]])
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%r = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
|
||||
ret i1 %r
|
||||
|
Loading…
Reference in New Issue
Block a user