mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[PowerPC] [Constant Hoisting] Enable constant hoisting on PPC
Implements the various TTI functions to enable constant hoisting on PPC. The only significant test-suite change is this: MultiSource/Benchmarks/VersaBench/bmm/bmm - 20% speedup (which essentially reverses the slowdown from r206120). llvm-svn: 206141
This commit is contained in:
parent
2c8bf1111d
commit
c4a623f8d4
@ -18,11 +18,15 @@
|
||||
#include "PPC.h"
|
||||
#include "PPCTargetMachine.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/CostTable.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
|
||||
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
|
||||
|
||||
// Declare the pass initialization routine locally as target-specific passes
|
||||
// don't havve a target-wide initialization entry point, and so we rely on the
|
||||
// pass constructor initialization.
|
||||
@ -67,6 +71,13 @@ public:
|
||||
|
||||
/// \name Scalar TTI Implementations
|
||||
/// @{
|
||||
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
|
||||
|
||||
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty) const override;
|
||||
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty) const override;
|
||||
|
||||
virtual PopcntSupportKind
|
||||
getPopcntSupport(unsigned TyWidth) const override;
|
||||
virtual void getUnrollingPreferences(
|
||||
@ -123,6 +134,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
|
||||
return PSK_Software;
|
||||
}
|
||||
|
||||
unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
|
||||
if (DisablePPCConstHoist)
|
||||
return TargetTransformInfo::getIntImmCost(Imm, Ty);
|
||||
|
||||
assert(Ty->isIntegerTy());
|
||||
|
||||
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
||||
if (BitSize == 0)
|
||||
return ~0U;
|
||||
|
||||
if (Imm == 0)
|
||||
return TCC_Free;
|
||||
|
||||
if (Imm.getBitWidth() <= 64) {
|
||||
if (isInt<16>(Imm.getSExtValue()))
|
||||
return TCC_Basic;
|
||||
|
||||
if (isInt<32>(Imm.getSExtValue())) {
|
||||
// A constant that can be materialized using lis.
|
||||
if ((Imm.getZExtValue() & 0xFFFF) == 0)
|
||||
return TCC_Basic;
|
||||
|
||||
return 2 * TCC_Basic;
|
||||
}
|
||||
}
|
||||
|
||||
return 4 * TCC_Basic;
|
||||
}
|
||||
|
||||
unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
|
||||
const APInt &Imm, Type *Ty) const {
|
||||
if (DisablePPCConstHoist)
|
||||
return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
|
||||
|
||||
assert(Ty->isIntegerTy());
|
||||
|
||||
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
||||
if (BitSize == 0)
|
||||
return ~0U;
|
||||
|
||||
switch (IID) {
|
||||
default: return TCC_Free;
|
||||
case Intrinsic::sadd_with_overflow:
|
||||
case Intrinsic::uadd_with_overflow:
|
||||
case Intrinsic::ssub_with_overflow:
|
||||
case Intrinsic::usub_with_overflow:
|
||||
if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
|
||||
return TCC_Free;
|
||||
break;
|
||||
}
|
||||
return PPCTTI::getIntImmCost(Imm, Ty);
|
||||
}
|
||||
|
||||
unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty) const {
|
||||
if (DisablePPCConstHoist)
|
||||
return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
|
||||
|
||||
assert(Ty->isIntegerTy());
|
||||
|
||||
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
||||
if (BitSize == 0)
|
||||
return ~0U;
|
||||
|
||||
unsigned ImmIdx = ~0U;
|
||||
bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
|
||||
ZeroFree = false;
|
||||
switch (Opcode) {
|
||||
default: return TCC_Free;
|
||||
case Instruction::GetElementPtr:
|
||||
// Always hoist the base address of a GetElementPtr. This prevents the
|
||||
// creation of new constants for every base constant that gets constant
|
||||
// folded with the offset.
|
||||
if (Idx == 0)
|
||||
return 2 * TCC_Basic;
|
||||
return TCC_Free;
|
||||
case Instruction::And:
|
||||
RunFree = true; // (for the rotate-and-mask instructions)
|
||||
// Fallthrough...
|
||||
case Instruction::Add:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
ShiftedFree = true;
|
||||
// Fallthrough...
|
||||
case Instruction::Sub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
ImmIdx = 1;
|
||||
break;
|
||||
case Instruction::ICmp:
|
||||
UnsignedFree = true;
|
||||
ImmIdx = 1;
|
||||
// Fallthrough... (zero comparisons can use record-form instructions)
|
||||
case Instruction::Select:
|
||||
ZeroFree = true;
|
||||
break;
|
||||
case Instruction::PHI:
|
||||
case Instruction::Call:
|
||||
case Instruction::Ret:
|
||||
case Instruction::Load:
|
||||
case Instruction::Store:
|
||||
break;
|
||||
}
|
||||
|
||||
if (ZeroFree && Imm == 0)
|
||||
return TCC_Free;
|
||||
|
||||
if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
|
||||
if (isInt<16>(Imm.getSExtValue()))
|
||||
return TCC_Free;
|
||||
|
||||
if (RunFree) {
|
||||
if (Imm.getBitWidth() <= 32 &&
|
||||
(isShiftedMask_32(Imm.getZExtValue()) ||
|
||||
isShiftedMask_32(~Imm.getZExtValue())))
|
||||
return TCC_Free;
|
||||
|
||||
|
||||
if (ST->isPPC64() &&
|
||||
(isShiftedMask_64(Imm.getZExtValue()) ||
|
||||
isShiftedMask_64(~Imm.getZExtValue())))
|
||||
return TCC_Free;
|
||||
}
|
||||
|
||||
if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
|
||||
return TCC_Free;
|
||||
|
||||
if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
|
||||
return TCC_Free;
|
||||
}
|
||||
|
||||
return PPCTTI::getIntImmCost(Imm, Ty);
|
||||
}
|
||||
|
||||
void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
|
||||
if (ST->getDarwinDirective() == PPC::DIR_A2) {
|
||||
// The A2 is in-order with a deep pipeline, and concatenation unrolling
|
||||
|
23
test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
Normal file
23
test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: opt -S -consthoist < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
%T = type { i32, i32, i32, i32 }
|
||||
|
||||
; Test if even cheap base addresses are hoisted.
|
||||
define i32 @test1() nounwind {
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: %const = bitcast i32 12345678 to i32
|
||||
; CHECK: %1 = inttoptr i32 %const to %T*
|
||||
; CHECK: %addr1 = getelementptr %T* %1, i32 0, i32 1
|
||||
%addr1 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
|
||||
%tmp1 = load i32* %addr1
|
||||
%addr2 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
|
||||
%tmp2 = load i32* %addr2
|
||||
%addr3 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
|
||||
%tmp3 = load i32* %addr3
|
||||
%tmp4 = add i32 %tmp1, %tmp2
|
||||
%tmp5 = add i32 %tmp3, %tmp4
|
||||
ret i32 %tmp5
|
||||
}
|
||||
|
4
test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
Normal file
4
test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
targets = set(config.root.targets_to_build.split())
|
||||
if not 'PowerPC' in targets:
|
||||
config.unsupported = True
|
||||
|
66
test/Transforms/ConstantHoisting/PowerPC/masks.ll
Normal file
66
test/Transforms/ConstantHoisting/PowerPC/masks.ll
Normal file
@ -0,0 +1,66 @@
|
||||
; RUN: opt -S -consthoist < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Here the masks are all contiguous, and should not be hoisted.
|
||||
define i32 @test1() nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK-NOT: bitcast i32 65535 to i32
|
||||
; CHECK: and i32 undef, 65535
|
||||
%conv121 = and i32 undef, 65535
|
||||
br i1 undef, label %if.then152, label %if.end167
|
||||
|
||||
if.then152:
|
||||
; CHECK: and i32 undef, 65535
|
||||
%conv153 = and i32 undef, 65535
|
||||
br i1 undef, label %if.end167, label %end2
|
||||
|
||||
if.end167:
|
||||
; CHECK: and i32 {{.*}}, 32768
|
||||
%shl161 = shl nuw nsw i32 %conv121, 15
|
||||
%0 = load i8* undef, align 1
|
||||
%conv169 = zext i8 %0 to i32
|
||||
%shl170 = shl nuw nsw i32 %conv169, 7
|
||||
%shl161.masked = and i32 %shl161, 32768
|
||||
%conv174 = or i32 %shl170, %shl161.masked
|
||||
%cmp178 = icmp ugt i32 %conv174, 32767
|
||||
br i1 %cmp178, label %end1, label %end2
|
||||
|
||||
end1:
|
||||
unreachable
|
||||
|
||||
end2:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Here the masks are not contiguous, and should be hoisted.
|
||||
define i32 @test2() nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: bitcast i32 65531 to i32
|
||||
%conv121 = and i32 undef, 65531
|
||||
br i1 undef, label %if.then152, label %if.end167
|
||||
|
||||
if.then152:
|
||||
%conv153 = and i32 undef, 65531
|
||||
br i1 undef, label %if.end167, label %end2
|
||||
|
||||
if.end167:
|
||||
; CHECK: add i32 {{.*}}, -32758
|
||||
%shl161 = shl nuw nsw i32 %conv121, 15
|
||||
%0 = load i8* undef, align 1
|
||||
%conv169 = zext i8 %0 to i32
|
||||
%shl170 = shl nuw nsw i32 %conv169, 7
|
||||
%shl161.masked = and i32 %shl161, 32773
|
||||
%conv174 = or i32 %shl170, %shl161.masked
|
||||
%cmp178 = icmp ugt i32 %conv174, 32767
|
||||
br i1 %cmp178, label %end1, label %end2
|
||||
|
||||
end1:
|
||||
unreachable
|
||||
|
||||
end2:
|
||||
unreachable
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user