1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[X86] Change the tuning settings for pentium4 to be more modern since its the default 32-bit cpu in clang

Alternative to D83897. I believe the big change here is that I removed slow unaligned memory 16

Down side that it may adversely effect tuning if someone explicitly targets -march=pentium4 and expects pentium4 tuned code. Of course pentium4 is so old our default behavior with the previous settings may not have been the best either.

Reviewed By: echristo, RKSimon

Differential Revision: https://reviews.llvm.org/D83913
This commit is contained in:
Craig Topper 2020-07-16 11:20:57 -07:00
parent 92817d9c2a
commit ab11c1e42d
5 changed files with 34 additions and 22 deletions

View File

@ -1072,10 +1072,20 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel,
FeatureCMOV, FeatureInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
// def : ProcessorModel<P, GenericPostRAModel,
// [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
// FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
// FeatureCMOV, FeatureInsertVZEROUPPER]>;
// Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
// give it more modern tunings.
// FIXME: This wouldn't be needed if we supported mtune.
def : ProcessorModel<P, GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
[FeatureX87, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
FeatureCMOV, FeatureInsertVZEROUPPER]>;
FeatureCMOV, FeatureInsertVZEROUPPER,
FeatureSlow3OpsLEA, FeatureSlowDivide64,
FeatureSlowIncDec, FeatureMacroFusion]>;
}
// Intel Quark.

View File

@ -16,19 +16,19 @@ entry:
; X32-LABEL: func_cf_vector_x86
; X32: movl 12(%ebp), %eax
; X32: movl 8(%ebp), %ecx
; X32: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero
; X32: movsd %xmm4, 24(%esp)
; X32: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero
; X32: movsd %xmm5, 16(%esp)
; X32: movsd (%eax), %xmm6 # xmm6 = mem[0],zero
; X32: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero
; X32: movsd %xmm7, 8(%esp)
; X32: movsd %xmm6, (%esp)
; X32: movups (%eax), %xmm0
; X32: movups 16(%eax), %xmm1
; X32: movaps %xmm0, (%esp)
; X32: movaps %xmm1, 16(%esp)
; X32: movsd (%esp), %xmm4
; X32: movsd 8(%esp), %xmm5
; X32: movsd 16(%esp), %xmm6
; X32: movsd 24(%esp), %xmm7
; X32: calll *___guard_check_icall_fptr
; X32: movaps %xmm6, %xmm0
; X32: movaps %xmm7, %xmm1
; X32: movaps %xmm5, %xmm2
; X32: movaps %xmm4, %xmm3
; X32: movaps %xmm4, %xmm0
; X32: movaps %xmm5, %xmm1
; X32: movaps %xmm6, %xmm2
; X32: movaps %xmm7, %xmm3
; X32: calll *%ecx
}
attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }

View File

@ -3,8 +3,6 @@
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW
@ -14,6 +12,10 @@
; Intel chips with fast unaligned memory accesses
; Marked fast because this is the default 32-bit mode CPU in clang.
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST

View File

@ -40,7 +40,7 @@
; OBJ: SubSectionType: FrameData (0xF5)
; OBJ: FrameData {
; OBJ: RvaStart: 0x0
; OBJ: CodeSize: 0x34
; OBJ: CodeSize: 0x36
; OBJ: PrologSize: 0x9
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
@ -50,7 +50,7 @@
; OBJ: }
; OBJ: FrameData {
; OBJ: RvaStart: 0x7
; OBJ: CodeSize: 0x2D
; OBJ: CodeSize: 0x2F
; OBJ: PrologSize: 0x2
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
@ -61,7 +61,7 @@
; OBJ: }
; OBJ: FrameData {
; OBJ: RvaStart: 0x8
; OBJ: CodeSize: 0x2C
; OBJ: CodeSize: 0x2E
; OBJ: PrologSize: 0x1
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
@ -73,7 +73,7 @@
; OBJ: }
; OBJ: FrameData {
; OBJ: RvaStart: 0x9
; OBJ: CodeSize: 0x2B
; OBJ: CodeSize: 0x2D
; OBJ: PrologSize: 0x0
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =

View File

@ -51,7 +51,7 @@
; CHECK: PtrParent: 0x0
; CHECK: PtrEnd: 0x0
; CHECK: PtrNext: 0x0
; CHECK: CodeSize: 0x39
; CHECK: CodeSize: 0x2A
; CHECK: DbgStart: 0x0
; CHECK: DbgEnd: 0x0
; CHECK: FunctionType: f (0x1002)
@ -73,7 +73,7 @@
; CHECK: LocalVariableAddrRange {
; CHECK: OffsetStart: .text+0x6
; CHECK: ISectStart: 0x0
; CHECK: Range: 0x33
; CHECK: Range: 0x24
; CHECK: }
; CHECK: }
; CHECK: ProcEnd {