mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[VectorCombine] add/use pass-level IRBuilder
This saves creating/destroying a builder every time we perform some transform. The tests show instruction ordering diffs resulting from always inserting at the root instruction now, but those should be benign.
This commit is contained in:
parent
f7e3d3a72e
commit
4aa4c0ae7d
@ -51,12 +51,13 @@ class VectorCombine {
|
||||
public:
|
||||
VectorCombine(Function &F, const TargetTransformInfo &TTI,
|
||||
const DominatorTree &DT)
|
||||
: F(F), TTI(TTI), DT(DT) {}
|
||||
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
|
||||
|
||||
bool run();
|
||||
|
||||
private:
|
||||
Function &F;
|
||||
IRBuilder<> Builder;
|
||||
const TargetTransformInfo &TTI;
|
||||
const DominatorTree &DT;
|
||||
|
||||
@ -64,6 +65,12 @@ private:
|
||||
unsigned Opcode,
|
||||
ExtractElementInst *&ConvertToShuffle,
|
||||
unsigned PreferredExtractIndex);
|
||||
ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
|
||||
unsigned NewIndex);
|
||||
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
Instruction &I);
|
||||
void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
Instruction &I);
|
||||
bool foldExtractExtract(Instruction &I);
|
||||
bool foldBitcastShuf(Instruction &I);
|
||||
bool scalarizeBinopOrCmp(Instruction &I);
|
||||
@ -182,12 +189,13 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
|
||||
/// the source vector (shift the scalar element) to a NewIndex for extraction.
|
||||
/// Return null if the input can be constant folded, so that we are not creating
|
||||
/// unnecessary instructions.
|
||||
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
|
||||
unsigned NewIndex) {
|
||||
ExtractElementInst *VectorCombine::translateExtract(ExtractElementInst *ExtElt,
|
||||
unsigned NewIndex) {
|
||||
// If the extract can be constant-folded, this code is unsimplified. Defer
|
||||
// to other passes to handle that.
|
||||
Value *X = ExtElt->getVectorOperand();
|
||||
Value *C = ExtElt->getIndexOperand();
|
||||
assert(isa<ConstantInt>(C) && "Expected a constant index operand");
|
||||
if (isa<Constant>(X))
|
||||
return nullptr;
|
||||
|
||||
@ -196,11 +204,9 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
|
||||
// ShufMask = { 2, undef, undef, undef }
|
||||
auto *VecTy = cast<FixedVectorType>(X->getType());
|
||||
SmallVector<int, 32> Mask(VecTy->getNumElements(), -1);
|
||||
assert(isa<ConstantInt>(C) && "Expected a constant index operand");
|
||||
Mask[NewIndex] = cast<ConstantInt>(C)->getZExtValue();
|
||||
|
||||
// extelt X, C --> extelt (shuffle X), NewIndex
|
||||
IRBuilder<> Builder(ExtElt);
|
||||
Value *Shuf =
|
||||
Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask, "shift");
|
||||
return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
|
||||
@ -209,8 +215,8 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
|
||||
/// Try to reduce extract element costs by converting scalar compares to vector
|
||||
/// compares followed by extract.
|
||||
/// cmp (ext0 V0, C), (ext1 V1, C)
|
||||
static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
Instruction &I) {
|
||||
void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
|
||||
ExtractElementInst *Ext1, Instruction &I) {
|
||||
assert(isa<CmpInst>(&I) && "Expected a compare");
|
||||
assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
|
||||
cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
|
||||
@ -218,7 +224,6 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
|
||||
// cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
|
||||
++NumVecCmp;
|
||||
IRBuilder<> Builder(&I);
|
||||
CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
|
||||
Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
|
||||
Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
|
||||
@ -230,8 +235,8 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
/// Try to reduce extract element costs by converting scalar binops to vector
|
||||
/// binops followed by extract.
|
||||
/// bo (ext0 V0, C), (ext1 V1, C)
|
||||
static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
Instruction &I) {
|
||||
void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
|
||||
ExtractElementInst *Ext1, Instruction &I) {
|
||||
assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
|
||||
assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
|
||||
cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
|
||||
@ -239,7 +244,6 @@ static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
|
||||
|
||||
// bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
|
||||
++NumVecBO;
|
||||
IRBuilder<> Builder(&I);
|
||||
Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
|
||||
Value *VecBO =
|
||||
Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
|
||||
@ -353,7 +357,6 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
|
||||
}
|
||||
// bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
|
||||
++NumShufOfBitcast;
|
||||
IRBuilder<> Builder(&I);
|
||||
Value *CastV = Builder.CreateBitCast(V, DestTy);
|
||||
Value *Shuf =
|
||||
Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
|
||||
@ -454,7 +457,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
|
||||
++NumScalarBO;
|
||||
|
||||
// For constant cases, extract the scalar element, this should constant fold.
|
||||
IRBuilder<> Builder(&I);
|
||||
if (IsConst0)
|
||||
V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
|
||||
if (IsConst1)
|
||||
@ -498,6 +500,7 @@ bool VectorCombine::run() {
|
||||
for (Instruction &I : BB) {
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
continue;
|
||||
Builder.SetInsertPoint(&I);
|
||||
MadeChange |= foldExtractExtract(I);
|
||||
MadeChange |= foldBitcastShuf(I);
|
||||
MadeChange |= scalarizeBinopOrCmp(I);
|
||||
|
@ -41,11 +41,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
|
||||
|
||||
define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
|
||||
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]]
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SHIFT2]]
|
||||
; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[X2Y210]]
|
||||
|
@ -454,12 +454,12 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @PR34724(
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
|
||||
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
|
||||
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
|
||||
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
|
||||
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
|
||||
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
|
||||
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user