From 84ceaccaeb0e30d12af6c77f67ef6b0dbbee405a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 29 Apr 2020 16:47:10 +0100 Subject: [PATCH] [x86] Enable bypassing 64-bit division on generic x86-64 This is currently enabled for Intel big cores from Sandy Bridge onward, as well as Atom, Silvermont, and KNL, due to 64-bit division being so slow on these cores. AMD cores can do this in hardware (use 32-bit division based on input operand width), so it's not a win there. But since the majority of x86 CPUs benefit from this optimization, and since the potential upside is significantly greater than the downside, we should enable this for the generic x86-64 target. Patch By: @atdt Reviewed By: @craig.topper, @RKSimon Differential Revision: https://reviews.llvm.org/D75567 --- lib/Target/X86/X86.td | 1 + test/CodeGen/X86/bypass-slow-division-tune.ll | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index c7990ba5d55..921c7793a6b 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -1260,6 +1260,7 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureNOPL, Feature64Bit, FeatureSlow3OpsLEA, + FeatureSlowDivide64, FeatureSlowIncDec, FeatureMacroFusion, FeatureInsertVZEROUPPER diff --git a/test/CodeGen/X86/bypass-slow-division-tune.ll b/test/CodeGen/X86/bypass-slow-division-tune.ll index 75a00dd03a3..8369a44dcba 100644 --- a/test/CodeGen/X86/bypass-slow-division-tune.ll +++ b/test/CodeGen/X86/bypass-slow-division-tune.ll @@ -66,9 +66,20 @@ define i64 @div64(i64 %a, i64 %b) { ; X64-LABEL: div64: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: orq %rsi, %rcx +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: je .LBB1_1 +; X64-NEXT: # %bb.2: ; X64-NEXT: cqto ; X64-NEXT: idivq %rsi ; X64-NEXT: retq +; X64-NEXT: .LBB1_1: +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: retq ; ; SLM-LABEL: div64: ; SLM: # %bb.0: # %entry @@ -178,9 +189,20 @@ define i64 @div64_hugews(i64 %a, i64 %b) { ; X64-LABEL: div64_hugews: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: orq %rsi, %rcx +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: je .LBB4_1 +; X64-NEXT: # %bb.2: ; X64-NEXT: cqto ; X64-NEXT: idivq %rsi ; X64-NEXT: retq +; X64-NEXT: .LBB4_1: +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: retq ; ; SLM-LABEL: div64_hugews: ; SLM: # %bb.0: