1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Revert "LoopVectorizer: Only allow vectorization of intrinsics."

Revert 191122 - with extra checks we are allowed to vectorize math library
function calls.

Standard library indentifiers are reserved names so functions with external
linkage must not overrided them. However, functions with internal linkage can.

Therefore, we can vectorize calls to math library functions with a check for
external linkage and matching signature. This matches what we do during
SelectionDAG building.

llvm-svn: 191206
This commit is contained in:
Arnold Schwaighofer 2013-09-23 14:54:39 +00:00
parent 48e843e09c
commit b1cea2cfcc
2 changed files with 98 additions and 48 deletions

View File

@ -1809,6 +1809,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
}
}
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 1 ||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
I.getType() != I.getArgOperand(0)->getType() ||
!I.onlyReadsMemory())
return Intrinsic::not_intrinsic;
return ValidIntrinsicID;
}
static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 2 ||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
!I.getArgOperand(1)->getType()->isFloatingPointTy() ||
I.getType() != I.getArgOperand(0)->getType() ||
I.getType() != I.getArgOperand(1)->getType() ||
!I.onlyReadsMemory())
return Intrinsic::not_intrinsic;
return ValidIntrinsicID;
}
static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
@ -1847,8 +1872,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
LibFunc::Func Func;
Function *F = CI->getCalledFunction();
// We're going to make assumptions on the semantics of the functions, check
// that the target knows that it's available in this environment.
if (!F || !TLI->getLibFunc(F->getName(), Func))
// that the target knows that it's available in this environment and it does
// not have local linkage.
if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
return Intrinsic::not_intrinsic;
// Otherwise check if we have a call to a function that can be turned into a
@ -1859,67 +1885,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
return Intrinsic::sin;
return checkUnaryFloatSignature(*CI, Intrinsic::sin);
case LibFunc::cos:
case LibFunc::cosf:
case LibFunc::cosl:
return Intrinsic::cos;
return checkUnaryFloatSignature(*CI, Intrinsic::cos);
case LibFunc::exp:
case LibFunc::expf:
case LibFunc::expl:
return Intrinsic::exp;
return checkUnaryFloatSignature(*CI, Intrinsic::exp);
case LibFunc::exp2:
case LibFunc::exp2f:
case LibFunc::exp2l:
return Intrinsic::exp2;
return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
case LibFunc::log:
case LibFunc::logf:
case LibFunc::logl:
return Intrinsic::log;
return checkUnaryFloatSignature(*CI, Intrinsic::log);
case LibFunc::log10:
case LibFunc::log10f:
case LibFunc::log10l:
return Intrinsic::log10;
return checkUnaryFloatSignature(*CI, Intrinsic::log10);
case LibFunc::log2:
case LibFunc::log2f:
case LibFunc::log2l:
return Intrinsic::log2;
return checkUnaryFloatSignature(*CI, Intrinsic::log2);
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
return Intrinsic::fabs;
return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
case LibFunc::copysign:
case LibFunc::copysignf:
case LibFunc::copysignl:
return Intrinsic::copysign;
return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
return Intrinsic::floor;
return checkUnaryFloatSignature(*CI, Intrinsic::floor);
case LibFunc::ceil:
case LibFunc::ceilf:
case LibFunc::ceill:
return Intrinsic::ceil;
return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
return Intrinsic::trunc;
return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
case LibFunc::rint:
case LibFunc::rintf:
case LibFunc::rintl:
return Intrinsic::rint;
return checkUnaryFloatSignature(*CI, Intrinsic::rint);
case LibFunc::nearbyint:
case LibFunc::nearbyintf:
case LibFunc::nearbyintl:
return Intrinsic::nearbyint;
return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
case LibFunc::round:
case LibFunc::roundf:
case LibFunc::roundl:
return Intrinsic::round;
return checkUnaryFloatSignature(*CI, Intrinsic::round);
case LibFunc::pow:
case LibFunc::powf:
case LibFunc::powl:
return Intrinsic::pow;
return checkBinaryFloatSignature(*CI, Intrinsic::pow);
}
return Intrinsic::not_intrinsic;
@ -2925,18 +2951,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// We still don't handle functions. However, we can ignore dbg intrinsic
// calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
if (CI) {
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
DEBUG(dbgs() << "LV: Found a call site.\n");
if (!isa<IntrinsicInst>(it)) {
DEBUG(dbgs() << "LV: We only vectorize intrinsics.\n");
return false;
}
if (!getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
DEBUG(dbgs() << "LV: Found an unknown intrinsic.\n");
return false;
}
return false;
}
// Check that the instruction return type is vectorizable.

View File

@ -1018,7 +1018,7 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
%0 = load float* %arrayidx, align 4
%call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
%call = tail call float @fabsf(float %0) nounwind readnone
store float %call, float* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@ -1029,31 +1029,64 @@ for.end: ; preds = %for.body
ret void
}
declare float @fabsf(float) nounwind readnone
declare double @llvm.pow.f64(double, double) nounwind readnone
;CHECK: @not_intrin
;CHECK: @round
;CHECK-NOT: @round
;CHECK: ret
define void @not_intrin(i32* nocapture %A) nounwind ssp uwtable {
br label %1
; <label>:1 ; preds = %1, %0
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
%2 = getelementptr inbounds i32* %A, i64 %indvars.iv
%3 = load i32* %2, align 4
%4 = add nsw i32 %3, 3
store i32 %4, i32* %2, align 4
%5 = trunc i64 %indvars.iv to i32
tail call void @round(i32 %5) nounwind
; Make sure we don't replace calls to functions with standard library function
; signatures but defined with internal linkage.
define internal float @roundf(float %x) nounwind readnone {
ret float 0.00000000
}
; CHECK-LABEL: internal_round
; CHECK-NOT: load <4 x float>
define void @internal_round(float* nocapture %x) nounwind {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
%0 = load float* %arrayidx, align 4
%call = tail call float @roundf(float %0) nounwind readnone
store float %call, float* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 256
br i1 %exitcond, label %6, label %1
%exitcond = icmp eq i32 %lftr.wideiv, 1024
br i1 %exitcond, label %for.end, label %for.body
; <label>:6 ; preds = %1
for.end: ; preds = %for.body
ret void
}
; Make sure we don't replace calls to functions with standard library names but
; different signatures.
declare void @round(double %f)
; CHECK-LABEL: wrong_signature
; CHECK-NOT: load <4 x double>
define void @wrong_signature(double* nocapture %x) nounwind {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
%0 = load double* %arrayidx, align 4
store double %0, double* %arrayidx, align 4
tail call void @round(double %0) nounwind readnone
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
declare void @round(i32)