mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[X86] Change the tuning settings for pentium4 to be more modern since its the default 32-bit cpu in clang
Alternative to D83897. I believe the big change here is that I removed slow unaligned memory 16 Down side that it may adversely effect tuning if someone explicitly targets -march=pentium4 and expects pentium4 tuned code. Of course pentium4 is so old our default behavior with the previous settings may not have been the best either. Reviewed By: echristo, RKSimon Differential Revision: https://reviews.llvm.org/D83913
This commit is contained in:
parent
92817d9c2a
commit
ab11c1e42d
@ -1072,10 +1072,20 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel,
|
||||
FeatureCMOV, FeatureInsertVZEROUPPER]>;
|
||||
|
||||
foreach P = ["pentium4", "pentium4m"] in {
|
||||
// def : ProcessorModel<P, GenericPostRAModel,
|
||||
// [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
|
||||
// FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
|
||||
// FeatureCMOV, FeatureInsertVZEROUPPER]>;
|
||||
|
||||
// Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
|
||||
// give it more modern tunings.
|
||||
// FIXME: This wouldn't be needed if we supported mtune.
|
||||
def : ProcessorModel<P, GenericPostRAModel,
|
||||
[FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
|
||||
[FeatureX87, FeatureCMPXCHG8B,
|
||||
FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
|
||||
FeatureCMOV, FeatureInsertVZEROUPPER]>;
|
||||
FeatureCMOV, FeatureInsertVZEROUPPER,
|
||||
FeatureSlow3OpsLEA, FeatureSlowDivide64,
|
||||
FeatureSlowIncDec, FeatureMacroFusion]>;
|
||||
}
|
||||
|
||||
// Intel Quark.
|
||||
|
@ -16,19 +16,19 @@ entry:
|
||||
; X32-LABEL: func_cf_vector_x86
|
||||
; X32: movl 12(%ebp), %eax
|
||||
; X32: movl 8(%ebp), %ecx
|
||||
; X32: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero
|
||||
; X32: movsd %xmm4, 24(%esp)
|
||||
; X32: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero
|
||||
; X32: movsd %xmm5, 16(%esp)
|
||||
; X32: movsd (%eax), %xmm6 # xmm6 = mem[0],zero
|
||||
; X32: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero
|
||||
; X32: movsd %xmm7, 8(%esp)
|
||||
; X32: movsd %xmm6, (%esp)
|
||||
; X32: movups (%eax), %xmm0
|
||||
; X32: movups 16(%eax), %xmm1
|
||||
; X32: movaps %xmm0, (%esp)
|
||||
; X32: movaps %xmm1, 16(%esp)
|
||||
; X32: movsd (%esp), %xmm4
|
||||
; X32: movsd 8(%esp), %xmm5
|
||||
; X32: movsd 16(%esp), %xmm6
|
||||
; X32: movsd 24(%esp), %xmm7
|
||||
; X32: calll *___guard_check_icall_fptr
|
||||
; X32: movaps %xmm6, %xmm0
|
||||
; X32: movaps %xmm7, %xmm1
|
||||
; X32: movaps %xmm5, %xmm2
|
||||
; X32: movaps %xmm4, %xmm3
|
||||
; X32: movaps %xmm4, %xmm0
|
||||
; X32: movaps %xmm5, %xmm1
|
||||
; X32: movaps %xmm6, %xmm2
|
||||
; X32: movaps %xmm7, %xmm3
|
||||
; X32: calll *%ecx
|
||||
}
|
||||
attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
|
||||
|
@ -3,8 +3,6 @@
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW
|
||||
@ -14,6 +12,10 @@
|
||||
|
||||
; Intel chips with fast unaligned memory accesses
|
||||
|
||||
; Marked fast because this is the default 32-bit mode CPU in clang.
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST
|
||||
|
@ -40,7 +40,7 @@
|
||||
; OBJ: SubSectionType: FrameData (0xF5)
|
||||
; OBJ: FrameData {
|
||||
; OBJ: RvaStart: 0x0
|
||||
; OBJ: CodeSize: 0x34
|
||||
; OBJ: CodeSize: 0x36
|
||||
; OBJ: PrologSize: 0x9
|
||||
; OBJ: FrameFunc [
|
||||
; OBJ-NEXT: $T0 .raSearch =
|
||||
@ -50,7 +50,7 @@
|
||||
; OBJ: }
|
||||
; OBJ: FrameData {
|
||||
; OBJ: RvaStart: 0x7
|
||||
; OBJ: CodeSize: 0x2D
|
||||
; OBJ: CodeSize: 0x2F
|
||||
; OBJ: PrologSize: 0x2
|
||||
; OBJ: FrameFunc [
|
||||
; OBJ-NEXT: $T0 .raSearch =
|
||||
@ -61,7 +61,7 @@
|
||||
; OBJ: }
|
||||
; OBJ: FrameData {
|
||||
; OBJ: RvaStart: 0x8
|
||||
; OBJ: CodeSize: 0x2C
|
||||
; OBJ: CodeSize: 0x2E
|
||||
; OBJ: PrologSize: 0x1
|
||||
; OBJ: FrameFunc [
|
||||
; OBJ-NEXT: $T0 .raSearch =
|
||||
@ -73,7 +73,7 @@
|
||||
; OBJ: }
|
||||
; OBJ: FrameData {
|
||||
; OBJ: RvaStart: 0x9
|
||||
; OBJ: CodeSize: 0x2B
|
||||
; OBJ: CodeSize: 0x2D
|
||||
; OBJ: PrologSize: 0x0
|
||||
; OBJ: FrameFunc [
|
||||
; OBJ-NEXT: $T0 .raSearch =
|
||||
|
@ -51,7 +51,7 @@
|
||||
; CHECK: PtrParent: 0x0
|
||||
; CHECK: PtrEnd: 0x0
|
||||
; CHECK: PtrNext: 0x0
|
||||
; CHECK: CodeSize: 0x39
|
||||
; CHECK: CodeSize: 0x2A
|
||||
; CHECK: DbgStart: 0x0
|
||||
; CHECK: DbgEnd: 0x0
|
||||
; CHECK: FunctionType: f (0x1002)
|
||||
@ -73,7 +73,7 @@
|
||||
; CHECK: LocalVariableAddrRange {
|
||||
; CHECK: OffsetStart: .text+0x6
|
||||
; CHECK: ISectStart: 0x0
|
||||
; CHECK: Range: 0x33
|
||||
; CHECK: Range: 0x24
|
||||
; CHECK: }
|
||||
; CHECK: }
|
||||
; CHECK: ProcEnd {
|
||||
|
Loading…
Reference in New Issue
Block a user