mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
Add a late IR expansion pass for the experimental reduction intrinsics.
This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 llvm-svn: 302631
This commit is contained in:
parent
2eb1e4752a
commit
668fbd4cf5
@ -753,6 +753,9 @@ public:
|
||||
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
|
||||
ReductionFlags Flags) const;
|
||||
|
||||
/// \returns True if the target wants to expand the given reduction intrinsic
|
||||
/// into a shuffle sequence.
|
||||
bool shouldExpandReduction(const IntrinsicInst *II) const;
|
||||
/// @}
|
||||
|
||||
private:
|
||||
@ -910,6 +913,7 @@ public:
|
||||
VectorType *VecTy) const = 0;
|
||||
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
|
||||
ReductionFlags) const = 0;
|
||||
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@ -1219,6 +1223,9 @@ public:
|
||||
ReductionFlags Flags) const override {
|
||||
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
|
||||
}
|
||||
bool shouldExpandReduction(const IntrinsicInst *II) const override {
|
||||
return Impl.shouldExpandReduction(II);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -462,6 +462,10 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool shouldExpandReduction(const IntrinsicInst *II) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Obtain the minimum required size to hold the value (without the sign)
|
||||
// In case of a vector it returns the min required size for one element.
|
||||
|
24
include/llvm/CodeGen/ExpandReductions.h
Normal file
24
include/llvm/CodeGen/ExpandReductions.h
Normal file
@ -0,0 +1,24 @@
|
||||
//===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CODEGEN_EXPANDREDUCTIONS_H
|
||||
#define LLVM_CODEGEN_EXPANDREDUCTIONS_H
|
||||
|
||||
#include "llvm/IR/PassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ExpandReductionsPass
|
||||
: public PassInfoMixin<ExpandReductionsPass> {
|
||||
public:
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_CODEGEN_EXPANDREDUCTIONS_H
|
@ -405,6 +405,10 @@ namespace llvm {
|
||||
/// printing assembly.
|
||||
ModulePass *createMachineOutlinerPass();
|
||||
|
||||
/// This pass expands the experimental reduction intrinsics into sequences of
|
||||
/// shuffles.
|
||||
FunctionPass *createExpandReductionsPass();
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
/// Target machine pass initializer for passes with dependencies. Use with
|
||||
|
@ -130,6 +130,7 @@ void initializeEfficiencySanitizerPass(PassRegistry&);
|
||||
void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
|
||||
void initializeExpandISelPseudosPass(PassRegistry&);
|
||||
void initializeExpandPostRAPass(PassRegistry&);
|
||||
void initializeExpandReductionsPass(PassRegistry&);
|
||||
void initializeExternalAAWrapperPassPass(PassRegistry&);
|
||||
void initializeFEntryInserterPass(PassRegistry&);
|
||||
void initializeFinalizeMachineBundlesPass(PassRegistry&);
|
||||
|
@ -491,6 +491,12 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
|
||||
LoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE = nullptr);
|
||||
|
||||
/// Generates a vector reduction using shufflevectors to reduce the value.
|
||||
Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
||||
RecurrenceDescriptor::MinMaxRecurrenceKind
|
||||
MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
|
||||
ArrayRef<Value *> RedOps = ArrayRef<Value *>());
|
||||
|
||||
/// Create a target reduction of the given vector. The reduction operation
|
||||
/// is described by the \p Opcode parameter. min/max reductions require
|
||||
/// additional information supplied in \p Flags.
|
||||
|
@ -505,6 +505,9 @@ bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
|
||||
return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
|
||||
return TTIImpl->shouldExpandReduction(II);
|
||||
}
|
||||
|
||||
TargetTransformInfo::Concept::~Concept() {}
|
||||
|
||||
|
@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen
|
||||
ExecutionDepsFix.cpp
|
||||
ExpandISelPseudos.cpp
|
||||
ExpandPostRAPseudos.cpp
|
||||
ExpandReductions.cpp
|
||||
FaultMaps.cpp
|
||||
FEntryInserter.cpp
|
||||
FuncletLayout.cpp
|
||||
|
167
lib/CodeGen/ExpandReductions.cpp
Normal file
167
lib/CodeGen/ExpandReductions.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass implements IR expansion for reduction intrinsics, allowing targets
|
||||
// to enable the experimental intrinsics until just before codegen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/ExpandReductions.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
unsigned getOpcode(Intrinsic::ID ID) {
|
||||
switch (ID) {
|
||||
case Intrinsic::experimental_vector_reduce_fadd:
|
||||
return Instruction::FAdd;
|
||||
case Intrinsic::experimental_vector_reduce_fmul:
|
||||
return Instruction::FMul;
|
||||
case Intrinsic::experimental_vector_reduce_add:
|
||||
return Instruction::Add;
|
||||
case Intrinsic::experimental_vector_reduce_mul:
|
||||
return Instruction::Mul;
|
||||
case Intrinsic::experimental_vector_reduce_and:
|
||||
return Instruction::And;
|
||||
case Intrinsic::experimental_vector_reduce_or:
|
||||
return Instruction::Or;
|
||||
case Intrinsic::experimental_vector_reduce_xor:
|
||||
return Instruction::Xor;
|
||||
case Intrinsic::experimental_vector_reduce_smax:
|
||||
case Intrinsic::experimental_vector_reduce_smin:
|
||||
case Intrinsic::experimental_vector_reduce_umax:
|
||||
case Intrinsic::experimental_vector_reduce_umin:
|
||||
return Instruction::ICmp;
|
||||
case Intrinsic::experimental_vector_reduce_fmax:
|
||||
case Intrinsic::experimental_vector_reduce_fmin:
|
||||
return Instruction::FCmp;
|
||||
default:
|
||||
llvm_unreachable("Unexpected ID");
|
||||
}
|
||||
}
|
||||
|
||||
RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
|
||||
switch (ID) {
|
||||
case Intrinsic::experimental_vector_reduce_smax:
|
||||
return RecurrenceDescriptor::MRK_SIntMax;
|
||||
case Intrinsic::experimental_vector_reduce_smin:
|
||||
return RecurrenceDescriptor::MRK_SIntMin;
|
||||
case Intrinsic::experimental_vector_reduce_umax:
|
||||
return RecurrenceDescriptor::MRK_UIntMax;
|
||||
case Intrinsic::experimental_vector_reduce_umin:
|
||||
return RecurrenceDescriptor::MRK_UIntMin;
|
||||
case Intrinsic::experimental_vector_reduce_fmax:
|
||||
return RecurrenceDescriptor::MRK_FloatMax;
|
||||
case Intrinsic::experimental_vector_reduce_fmin:
|
||||
return RecurrenceDescriptor::MRK_FloatMin;
|
||||
default:
|
||||
return RecurrenceDescriptor::MRK_Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
|
||||
bool Changed = false;
|
||||
SmallVector<IntrinsicInst*, 4> Worklist;
|
||||
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
|
||||
if (auto II = dyn_cast<IntrinsicInst>(&*I))
|
||||
Worklist.push_back(II);
|
||||
|
||||
for (auto *II : Worklist) {
|
||||
IRBuilder<> Builder(II);
|
||||
Value *Vec = nullptr;
|
||||
auto ID = II->getIntrinsicID();
|
||||
auto MRK = RecurrenceDescriptor::MRK_Invalid;
|
||||
switch (ID) {
|
||||
case Intrinsic::experimental_vector_reduce_fadd:
|
||||
case Intrinsic::experimental_vector_reduce_fmul:
|
||||
// FMFs must be attached to the call, otherwise it's an ordered reduction
|
||||
// and it can't be handled by generating this shuffle sequence.
|
||||
// TODO: Implement scalarization of ordered reductions here for targets
|
||||
// without native support.
|
||||
if (!II->getFastMathFlags().unsafeAlgebra())
|
||||
continue;
|
||||
Vec = II->getArgOperand(1);
|
||||
break;
|
||||
case Intrinsic::experimental_vector_reduce_add:
|
||||
case Intrinsic::experimental_vector_reduce_mul:
|
||||
case Intrinsic::experimental_vector_reduce_and:
|
||||
case Intrinsic::experimental_vector_reduce_or:
|
||||
case Intrinsic::experimental_vector_reduce_xor:
|
||||
case Intrinsic::experimental_vector_reduce_smax:
|
||||
case Intrinsic::experimental_vector_reduce_smin:
|
||||
case Intrinsic::experimental_vector_reduce_umax:
|
||||
case Intrinsic::experimental_vector_reduce_umin:
|
||||
case Intrinsic::experimental_vector_reduce_fmax:
|
||||
case Intrinsic::experimental_vector_reduce_fmin:
|
||||
Vec = II->getArgOperand(0);
|
||||
MRK = getMRK(ID);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
if (!TTI->shouldExpandReduction(II))
|
||||
continue;
|
||||
auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
||||
II->replaceAllUsesWith(Rdx);
|
||||
II->eraseFromParent();
|
||||
Changed = true;
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
class ExpandReductions : public FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
ExpandReductions() : FunctionPass(ID) {
|
||||
initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F) override {
|
||||
const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||||
return expandReductions(F, TTI);
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
char ExpandReductions::ID;
|
||||
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
|
||||
"Expand reduction intrinsics", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
|
||||
"Expand reduction intrinsics", false, false)
|
||||
|
||||
FunctionPass *llvm::createExpandReductionsPass() {
|
||||
return new ExpandReductions();
|
||||
}
|
||||
|
||||
PreservedAnalyses ExpandReductionsPass::run(Function &F,
|
||||
FunctionAnalysisManager &AM) {
|
||||
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
||||
if (!expandReductions(F, &TTI))
|
||||
return PreservedAnalyses::all();
|
||||
PreservedAnalyses PA;
|
||||
PA.preserveSet<CFGAnalyses>();
|
||||
return PA;
|
||||
}
|
@ -487,6 +487,9 @@ void TargetPassConfig::addIRPasses() {
|
||||
|
||||
// Insert calls to mcount-like functions.
|
||||
addPass(createCountingFunctionInserterPass());
|
||||
|
||||
// Expand reduction intrinsics into shuffle sequences if the target wants to.
|
||||
addPass(createExpandReductionsPass());
|
||||
}
|
||||
|
||||
/// Turn exception handling constructs into something the code generators can
|
||||
|
@ -137,6 +137,10 @@ public:
|
||||
unsigned getMinPrefetchStride();
|
||||
|
||||
unsigned getMaxPrefetchIterationsAhead();
|
||||
|
||||
bool shouldExpandReduction(const IntrinsicInst *II) const {
|
||||
return false;
|
||||
}
|
||||
/// @}
|
||||
};
|
||||
|
||||
|
@ -1125,11 +1125,10 @@ static Value *addFastMathFlag(Value *V) {
|
||||
}
|
||||
|
||||
// Helper to generate a log2 shuffle reduction.
|
||||
static Value *
|
||||
getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
||||
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
|
||||
RecurrenceDescriptor::MRK_Invalid,
|
||||
ArrayRef<Value *> RedOps = ArrayRef<Value *>()) {
|
||||
Value *
|
||||
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
||||
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
|
||||
ArrayRef<Value *> RedOps) {
|
||||
unsigned VF = Src->getType()->getVectorNumElements();
|
||||
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
|
||||
// and vector ops, reducing the set of values being computed by half each
|
||||
|
210
test/CodeGen/Generic/expand-experimental-reductions.ll
Normal file
210
test/CodeGen/Generic/expand-experimental-reductions.ll
Normal file
@ -0,0 +1,210 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -expand-reductions -S | FileCheck %s
|
||||
; Tests without a target which should expand all reductions
|
||||
declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
|
||||
|
||||
declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
|
||||
declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
|
||||
|
||||
declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
|
||||
|
||||
declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
|
||||
declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
|
||||
|
||||
|
||||
define i64 @add_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @add_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @mul_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @mul_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @and_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @and_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @or_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @or_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @xor_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @xor_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define float @fadd_f32(<4 x float> %vec) {
|
||||
; CHECK-LABEL: @fadd_f32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_f32_strict(<4 x float> %vec) {
|
||||
; CHECK-LABEL: @fadd_f32_strict(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> [[VEC:%.*]])
|
||||
; CHECK-NEXT: ret float [[R]]
|
||||
;
|
||||
entry:
|
||||
%r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fmul_f32(<4 x float> %vec) {
|
||||
; CHECK-LABEL: @fmul_f32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i64 @smax_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @smax_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @smin_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @smin_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @umax_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @umax_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define i64 @umin_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @umin_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
define double @fmax_f64(<2 x double> %vec) {
|
||||
; CHECK-LABEL: @fmax_f64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec)
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @fmin_f64(<2 x double> %vec) {
|
||||
; CHECK-LABEL: @fmin_f64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec)
|
||||
ret double %r
|
||||
}
|
@ -23,6 +23,7 @@
|
||||
; CHECK-NEXT: Shadow Stack GC Lowering
|
||||
; CHECK-NEXT: Remove unreachable blocks from the CFG
|
||||
; CHECK-NEXT: Inserts calls to mcount-like functions
|
||||
; CHECK-NEXT: Expand reduction intrinsics
|
||||
; CHECK-NEXT: Rewrite Symbols
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
|
@ -301,6 +301,7 @@ int main(int argc, char **argv) {
|
||||
initializeConstantHoistingLegacyPassPass(*Registry);
|
||||
initializeScalarOpts(*Registry);
|
||||
initializeVectorization(*Registry);
|
||||
initializeExpandReductionsPass(*Registry);
|
||||
|
||||
// Register the target printer for --version.
|
||||
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
|
||||
|
@ -397,6 +397,7 @@ int main(int argc, char **argv) {
|
||||
initializeInterleavedAccessPass(Registry);
|
||||
initializeCountingFunctionInserterPass(Registry);
|
||||
initializeUnreachableBlockElimLegacyPassPass(Registry);
|
||||
initializeExpandReductionsPass(Registry);
|
||||
|
||||
#ifdef LINK_POLLY_INTO_TOOLS
|
||||
polly::initializePollyPasses(Registry);
|
||||
|
Loading…
Reference in New Issue
Block a user