1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] For Silvermont CPU use 16-bit division instead of 64-bit for small positive numbers

Differential Revision: http://reviews.llvm.org/D5938

llvm-svn: 222521
This commit is contained in:
Alexey Volkov 2014-11-21 11:19:34 +00:00
parent cb671c0b2c
commit 235268b4ed
5 changed files with 51 additions and 12 deletions

View File

@ -167,9 +167,12 @@ def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
"Support SMAP instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
"HasSlowDivide", "true",
"Use small divide for positive values less than 256">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
"HasSlowDivide64", "true",
"Use 16-bit divide for positive values less than 65536">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
@ -234,7 +237,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
def : ProcessorModel<"atom", AtomModel,
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide,
FeatureSlowDivide32, FeatureSlowDivide64,
FeatureCallRegIndirect,
FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
@ -244,6 +247,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeatureSSE42, FeatureCMPXCHG16B,
FeatureMOVBE, FeaturePOPCNT,
FeaturePCLMUL, FeatureAES,
FeatureSlowDivide64,
FeatureCallRegIndirect,
FeaturePRFCHW,
FeatureSlowLEA, FeatureSlowIncDec,

View File

@ -249,9 +249,10 @@ void X86TargetLowering::resetOperationActions() {
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides on Atom when compiling with O2
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
addBypassSlowDiv(32, 8);
if (Subtarget->is64Bit())
if (TM.getOptLevel() >= CodeGenOpt::Default) {
if (Subtarget->hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
}

View File

@ -267,7 +267,8 @@ void X86Subtarget::initializeEnvironment() {
HasVectorUAMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
HasSlowDivide = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;

View File

@ -171,9 +171,13 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
/// HasSlowDivide - True if smaller divides are significantly faster than
/// full divides and should be used when possible.
bool HasSlowDivide;
/// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32;
/// HasSlowDivide64 - True if 16-bit divides are significantly faster than
/// 64-bit divisions and should be used when possible.
bool HasSlowDivide64;
/// PadShortFunctions - True if the short functions should be padded to prevent
/// a stall when returning too early.
@ -373,7 +377,8 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }

View File

@ -0,0 +1,28 @@
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s
define i32 @div32(i32 %a, i32 %b) {
entry:
; DIV32-LABEL: div32:
; DIV32: orl %{{.*}}, [[REG:%[a-z]+]]
; DIV32: testl $-256, [[REG]]
; DIV32: divb
; DIV64-LABEL: div32:
; DIV64-NOT: divb
%div = sdiv i32 %a, %b
ret i32 %div
}
define i64 @div64(i64 %a, i64 %b) {
entry:
; DIV32-LABEL: div64:
; DIV32-NOT: divw
; DIV64-LABEL: div64:
; DIV64: orq %{{.*}}, [[REG:%[a-z]+]]
; DIV64: testq $-65536, [[REG]]
; DIV64: divw
%div = sdiv i64 %a, %b
ret i64 %div
}