mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] For Silvermont CPU use 16-bit division instead of 64-bit for small positive numbers
Differential Revision: http://reviews.llvm.org/D5938 llvm-svn: 222521
This commit is contained in:
parent
cb671c0b2c
commit
235268b4ed
@ -167,9 +167,12 @@ def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
|
||||
"Support SMAP instructions">;
|
||||
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
|
||||
"Use LEA for adjusting the stack pointer">;
|
||||
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
|
||||
"HasSlowDivide", "true",
|
||||
"Use small divide for positive values less than 256">;
|
||||
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
|
||||
"HasSlowDivide32", "true",
|
||||
"Use 8-bit divide for positive values less than 256">;
|
||||
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
|
||||
"HasSlowDivide64", "true",
|
||||
"Use 16-bit divide for positive values less than 65536">;
|
||||
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||
"PadShortFunctions", "true",
|
||||
"Pad short functions">;
|
||||
@ -234,7 +237,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
|
||||
def : ProcessorModel<"atom", AtomModel,
|
||||
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
|
||||
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
|
||||
FeatureSlowDivide,
|
||||
FeatureSlowDivide32, FeatureSlowDivide64,
|
||||
FeatureCallRegIndirect,
|
||||
FeatureLEAUsesAG,
|
||||
FeaturePadShortFunctions]>;
|
||||
@ -244,6 +247,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
|
||||
FeatureSSE42, FeatureCMPXCHG16B,
|
||||
FeatureMOVBE, FeaturePOPCNT,
|
||||
FeaturePCLMUL, FeatureAES,
|
||||
FeatureSlowDivide64,
|
||||
FeatureCallRegIndirect,
|
||||
FeaturePRFCHW,
|
||||
FeatureSlowLEA, FeatureSlowIncDec,
|
||||
|
@ -249,9 +249,10 @@ void X86TargetLowering::resetOperationActions() {
|
||||
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
|
||||
|
||||
// Bypass expensive divides on Atom when compiling with O2
|
||||
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
|
||||
addBypassSlowDiv(32, 8);
|
||||
if (Subtarget->is64Bit())
|
||||
if (TM.getOptLevel() >= CodeGenOpt::Default) {
|
||||
if (Subtarget->hasSlowDivide32())
|
||||
addBypassSlowDiv(32, 8);
|
||||
if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
|
||||
addBypassSlowDiv(64, 16);
|
||||
}
|
||||
|
||||
|
@ -267,7 +267,8 @@ void X86Subtarget::initializeEnvironment() {
|
||||
HasVectorUAMem = false;
|
||||
HasCmpxchg16b = false;
|
||||
UseLeaForSP = false;
|
||||
HasSlowDivide = false;
|
||||
HasSlowDivide32 = false;
|
||||
HasSlowDivide64 = false;
|
||||
PadShortFunctions = false;
|
||||
CallRegIndirect = false;
|
||||
LEAUsesAG = false;
|
||||
|
@ -171,9 +171,13 @@ protected:
|
||||
/// the stack pointer. This is an optimization for Intel Atom processors.
|
||||
bool UseLeaForSP;
|
||||
|
||||
/// HasSlowDivide - True if smaller divides are significantly faster than
|
||||
/// full divides and should be used when possible.
|
||||
bool HasSlowDivide;
|
||||
/// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
|
||||
/// 32-bit divisions and should be used when possible.
|
||||
bool HasSlowDivide32;
|
||||
|
||||
/// HasSlowDivide64 - True if 16-bit divides are significantly faster than
|
||||
/// 64-bit divisions and should be used when possible.
|
||||
bool HasSlowDivide64;
|
||||
|
||||
/// PadShortFunctions - True if the short functions should be padded to prevent
|
||||
/// a stall when returning too early.
|
||||
@ -373,7 +377,8 @@ public:
|
||||
bool hasVectorUAMem() const { return HasVectorUAMem; }
|
||||
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
|
||||
bool useLeaForSP() const { return UseLeaForSP; }
|
||||
bool hasSlowDivide() const { return HasSlowDivide; }
|
||||
bool hasSlowDivide32() const { return HasSlowDivide32; }
|
||||
bool hasSlowDivide64() const { return HasSlowDivide64; }
|
||||
bool padShortFunctions() const { return PadShortFunctions; }
|
||||
bool callRegIndirect() const { return CallRegIndirect; }
|
||||
bool LEAusesAG() const { return LEAUsesAG; }
|
||||
|
28
test/CodeGen/X86/slow-div.ll
Normal file
28
test/CodeGen/X86/slow-div.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s
|
||||
|
||||
define i32 @div32(i32 %a, i32 %b) {
|
||||
entry:
|
||||
; DIV32-LABEL: div32:
|
||||
; DIV32: orl %{{.*}}, [[REG:%[a-z]+]]
|
||||
; DIV32: testl $-256, [[REG]]
|
||||
; DIV32: divb
|
||||
; DIV64-LABEL: div32:
|
||||
; DIV64-NOT: divb
|
||||
%div = sdiv i32 %a, %b
|
||||
ret i32 %div
|
||||
}
|
||||
|
||||
define i64 @div64(i64 %a, i64 %b) {
|
||||
entry:
|
||||
; DIV32-LABEL: div64:
|
||||
; DIV32-NOT: divw
|
||||
; DIV64-LABEL: div64:
|
||||
; DIV64: orq %{{.*}}, [[REG:%[a-z]+]]
|
||||
; DIV64: testq $-65536, [[REG]]
|
||||
; DIV64: divw
|
||||
%div = sdiv i64 %a, %b
|
||||
ret i64 %div
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user