From 02871604f516d8a894dbc59ce211a05b9115775f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 6 Nov 2019 18:59:45 +0000 Subject: [PATCH] [X86] Fix SLM v2f64 ADD/MUL + FP BLEND/HADD instruction schedules Noticed while fixing the reduction costs for D59710 - the SLM model doesn't account for the poor throughput of v2f64/v2i64 ops. --- lib/Target/X86/X86ScheduleSLM.td | 14 +++---- test/tools/llvm-mca/X86/SLM/resources-sse2.s | 26 ++++++------ test/tools/llvm-mca/X86/SLM/resources-sse3.s | 42 +++++++++---------- test/tools/llvm-mca/X86/SLM/resources-sse41.s | 18 ++++---- 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 8e3ce721f1a..84aac01ab38 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -202,8 +202,8 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -219,8 +219,8 @@ defm : SLMWriteResPair defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -380,8 +380,8 @@ def : WriteRes { // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -486,7 +486,7 @@ defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; diff --git a/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/test/tools/llvm-mca/X86/SLM/resources-sse2.s index 477a9ce97ec..92f33a20839 100644 --- a/test/tools/llvm-mca/X86/SLM/resources-sse2.s +++ b/test/tools/llvm-mca/X86/SLM/resources-sse2.s @@ -407,8 +407,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * addpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 addpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 2.00 * addpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * addsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2 @@ -510,8 +510,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax) # CHECK-NEXT: 1 3 1.00 * movupd (%rax), %xmm2 -# CHECK-NEXT: 1 5 2.00 mulpd %xmm0, %xmm2 -# CHECK-NEXT: 1 8 2.00 * mulpd (%rax), %xmm2 +# CHECK-NEXT: 1 7 4.00 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 4.00 * mulpd (%rax), %xmm2 # CHECK-NEXT: 1 5 2.00 mulsd %xmm0, %xmm2 # CHECK-NEXT: 1 8 2.00 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2 @@ -662,8 +662,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2 # CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * subpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 2.00 * subpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * subsd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 ucomisd %xmm0, %xmm1 @@ -687,12 +687,12 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - 412.00 8.00 152.50 86.50 3.00 3.00 134.00 +# CHECK-NEXT: - 412.00 12.00 152.50 90.50 3.00 3.00 134.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - - - - 1.00 - - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 addpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 2.00 - - - addpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 2.00 - - 1.00 addpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - addsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 addsd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - andnpd %xmm0, %xmm2 @@ -794,8 +794,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 movupd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 movupd (%rax), %xmm2 -# CHECK-NEXT: - - 2.00 1.00 - - - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 mulpd (%rax), %xmm2 +# CHECK-NEXT: - - 4.00 1.00 - - - - mulpd %xmm0, %xmm2 +# CHECK-NEXT: - - 4.00 1.00 - - - 1.00 mulpd (%rax), %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - - mulsd %xmm0, %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 mulsd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - orpd %xmm0, %xmm2 @@ -946,8 +946,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - 70.00 - 1.00 - - - 1.00 sqrtpd (%rax), %xmm2 # CHECK-NEXT: - 35.00 - 1.00 - - - - sqrtsd %xmm0, %xmm2 # CHECK-NEXT: - 35.00 - 1.00 - - - 1.00 sqrtsd (%rax), %xmm2 -# CHECK-NEXT: - - - - 1.00 - - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 subpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 2.00 - - - subpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 2.00 - - 1.00 subpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - subsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 subsd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - ucomisd %xmm0, %xmm1 diff --git a/test/tools/llvm-mca/X86/SLM/resources-sse3.s b/test/tools/llvm-mca/X86/SLM/resources-sse3.s index 484c353b07b..23949737b3c 100644 --- a/test/tools/llvm-mca/X86/SLM/resources-sse3.s +++ b/test/tools/llvm-mca/X86/SLM/resources-sse3.s @@ -43,18 +43,18 @@ mwait # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 2.00 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * addsubps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * haddpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * haddps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * hsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * hsubps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 1.00 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 @@ -77,22 +77,22 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 16.00 12.00 - - 10.00 +# CHECK-NEXT: - - - 32.00 30.00 - - 10.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - - - - 1.00 - - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 addsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 2.00 - - - addsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 2.00 - - 1.00 addsubpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - addsubps %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 addsubps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 haddpd (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 haddps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 hsubpd (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 hsubps (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - haddpd %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddpd (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - haddps %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddps (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - hsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - hsubps %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 lddqu (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - monitor # CHECK-NEXT: - - - 1.00 - - - - movddup %xmm0, %xmm2 diff --git a/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/test/tools/llvm-mca/X86/SLM/resources-sse41.s index ce74a7625e7..0dc83a0fb62 100644 --- a/test/tools/llvm-mca/X86/SLM/resources-sse41.s +++ b/test/tools/llvm-mca/X86/SLM/resources-sse41.s @@ -159,10 +159,10 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 blendps $11, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * blendps $11, (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * blendvpd %xmm0, (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 4 4.00 blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 4 4.00 blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2 @@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 73.00 22.00 - - 54.00 +# CHECK-NEXT: - - - 85.00 22.00 - - 54.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -272,10 +272,10 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 blendpd $11, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - blendps $11, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 blendps $11, (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - 4.00 - - - - blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2