mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[LoopVectorize] use IR fast-math-flags exclusively (not FP function attributes)
I am trying to untangle the fast-math-flags propagation logic in the vectorizers (see a6f022127 for SLP). The loop vectorizer has a mix of checking FP function attributes, IR-level FMF, and just wrong assumptions. I am trying to avoid regressions while fixing this, and I think the IR-level logic is good enough for that, but it's hard to say for sure. This would be the 1st step in the clean-up. The existing test that I changed to include 'fast' actually shows a miscompile: the function only had the equivalent of nnan, but we created new instructions that had fast (all FMF set). This is similar to the example in https://llvm.org/PR35538 Differential Revision: https://reviews.llvm.org/D95452
This commit is contained in:
parent
8ef46f8e36
commit
2ae45edb62
@ -344,9 +344,6 @@ public:
|
||||
unsigned getNumStores() const { return LAI->getNumStores(); }
|
||||
unsigned getNumLoads() const { return LAI->getNumLoads(); }
|
||||
|
||||
// Returns true if the NoNaN attribute is set on the function.
|
||||
bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
|
||||
|
||||
/// Returns all assume calls in predicated blocks. They need to be dropped
|
||||
/// when flattening the CFG.
|
||||
const SmallPtrSetImpl<Instruction *> &getConditionalAssumes() const {
|
||||
@ -495,9 +492,6 @@ private:
|
||||
/// outside the loop.
|
||||
SmallPtrSet<Value *, 4> AllowedExit;
|
||||
|
||||
/// Can we assume the absence of NaNs.
|
||||
bool HasFunNoNaNAttr = false;
|
||||
|
||||
/// Vectorization requirements that will go through late-evaluation.
|
||||
LoopVectorizationRequirements *Requirements;
|
||||
|
||||
|
@ -605,11 +605,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
|
||||
bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
BasicBlock *Header = TheLoop->getHeader();
|
||||
|
||||
// Look for the attribute signaling the absence of NaNs.
|
||||
Function &F = *Header->getParent();
|
||||
HasFunNoNaNAttr =
|
||||
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
|
||||
|
||||
// For each block in the loop.
|
||||
for (BasicBlock *BB : TheLoop->blocks()) {
|
||||
// Scan the instructions in the block and look for hazards.
|
||||
@ -673,7 +668,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
InductionDescriptor ID;
|
||||
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
|
||||
if (ID.hasUnsafeAlgebra())
|
||||
Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
|
||||
continue;
|
||||
}
|
||||
|
@ -8908,7 +8908,7 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
|
||||
? RecipeBuilder.createBlockInMask(R->getParent(), Plan)
|
||||
: nullptr;
|
||||
VPReductionRecipe *RedRecipe = new VPReductionRecipe(
|
||||
&RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), TTI);
|
||||
&RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
|
||||
WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe);
|
||||
Plan->removeVPValueFor(R);
|
||||
Plan->addVPValue(R, RedRecipe);
|
||||
|
@ -1130,17 +1130,15 @@ public:
|
||||
class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue {
|
||||
/// The recurrence decriptor for the reduction in question.
|
||||
RecurrenceDescriptor *RdxDesc;
|
||||
/// Fast math flags to use for the resulting reduction operation.
|
||||
bool NoNaN;
|
||||
/// Pointer to the TTI, needed to create the target reduction
|
||||
const TargetTransformInfo *TTI;
|
||||
|
||||
public:
|
||||
VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
|
||||
VPValue *VecOp, VPValue *CondOp, bool NoNaN,
|
||||
VPValue *VecOp, VPValue *CondOp,
|
||||
const TargetTransformInfo *TTI)
|
||||
: VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}),
|
||||
VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN),
|
||||
VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R),
|
||||
TTI(TTI) {
|
||||
if (CondOp)
|
||||
addOperand(CondOp);
|
||||
|
@ -4,7 +4,7 @@
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; This test checks auto-vectorization with FP induction variable.
|
||||
; The FP operation is not "fast" and requires "fast-math" function attribute.
|
||||
; FMF is required on the IR instructions.
|
||||
|
||||
;void fp_iv_loop1(float * __restrict__ A, int N) {
|
||||
; float x = 1.0;
|
||||
@ -149,7 +149,7 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
|
||||
; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
|
||||
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
|
||||
; AUTO_VEC-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4
|
||||
; AUTO_VEC-NEXT: [[CONV1]] = fadd float [[X_06]], 5.000000e-01
|
||||
; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
|
||||
; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; AUTO_VEC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
||||
; AUTO_VEC-NEXT: br i1 [[TMP45]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
|
||||
@ -168,7 +168,7 @@ for.body: ; preds = %for.body.preheader,
|
||||
%x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.06, float* %arrayidx, align 4
|
||||
%conv1 = fadd float %x.06, 5.000000e-01
|
||||
%conv1 = fadd fast float %x.06, 5.000000e-01
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
@ -181,7 +181,7 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
ret void
|
||||
}
|
||||
|
||||
; The same as the previous, FP operation is not fast, different function attribute
|
||||
; The same as the previous, but FP operation has no FMF.
|
||||
; Vectorization should be rejected.
|
||||
;void fp_iv_loop2(float * __restrict__ A, int N) {
|
||||
; float x = 1.0;
|
||||
@ -191,7 +191,7 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
; }
|
||||
;}
|
||||
|
||||
define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 {
|
||||
define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
|
||||
; AUTO_VEC-LABEL: @fp_iv_loop2(
|
||||
; AUTO_VEC-NEXT: entry:
|
||||
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
@ -538,6 +538,3 @@ for.end:
|
||||
%t1 = phi double [ %j, %for.body ]
|
||||
ret double %t1
|
||||
}
|
||||
|
||||
attributes #0 = { "no-nans-fp-math"="true" }
|
||||
attributes #1 = { "no-nans-fp-math"="false" }
|
||||
|
@ -643,7 +643,7 @@ TEST(VPRecipeTest, CastVPReductionRecipeToVPUser) {
|
||||
VPValue ChainOp;
|
||||
VPValue VecOp;
|
||||
VPValue CondOp;
|
||||
VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp, false,
|
||||
VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp,
|
||||
nullptr);
|
||||
EXPECT_TRUE(isa<VPUser>(&Recipe));
|
||||
VPRecipeBase *BaseR = &Recipe;
|
||||
|
Loading…
Reference in New Issue
Block a user