mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[ARM] Check for correct HW div when lowering divmod
For subtargets that use the custom lowering for divmod, e.g. gnueabi, we used to check if the subtarget has hardware divide and then lower to a div-mul-sub sequence if true, or to a libcall if false. However, judging by the usage of hasDivide vs hasDivideInARMMode, it seems that hasDivide only refers to Thumb. For instance, in the ARMTargetLowering constructor, the code that specifies whether to use libcalls for (S|U)DIV looks like this: bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() : Subtarget->hasDivideInARMMode(); In the case of divmod for arm-gnueabi, using only hasDivide() to determine what to do means that instead of lowering to __aeabi_idivmod to get the remainder, we lower to div-mul-sub and then further lower the div to __aeabi_idiv. Even worse, if we have hardware divide in ARM but not in Thumb, we generate a libcall instead of using it (this is not an issue in practice since AFAICT none of the cores that we support have hardware divide in ARM but not Thumb). This patch fixes the code dealing with custom lowering to take into account the mode (Thumb or ARM) when deciding whether or not hardware division is available. Differential Revision: https://reviews.llvm.org/D32005 llvm-svn: 300536
This commit is contained in:
parent
507db3264b
commit
93efa79fb7
@ -13052,7 +13052,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
|
||||
// rem = a - b * div
|
||||
// return {div, rem}
|
||||
// This should be lowered into UDIV/SDIV + MLS later on.
|
||||
if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
|
||||
bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
|
||||
: Subtarget->hasDivideInARMMode();
|
||||
if (hasDivide && Op->getValueType(0).isSimple() &&
|
||||
Op->getSimpleValueType(0) == MVT::i32) {
|
||||
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
|
||||
const SDValue Dividend = Op->getOperand(0);
|
||||
|
37
test/CodeGen/ARM/divmod-hwdiv.ll
Normal file
37
test/CodeGen/ARM/divmod-hwdiv.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; The hwdiv subtarget feature should only influence thumb, not arm.
|
||||
; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
|
||||
; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
|
||||
; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,THUMB-HWDIV
|
||||
; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
|
||||
|
||||
; The hwdiv-arm subtarget feature should only influence arm, not thumb.
|
||||
; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,ARM-HWDIV
|
||||
; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
|
||||
; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
|
||||
; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV
|
||||
|
||||
define arm_aapcscc i32 @test_i32_srem(i32 %x, i32 %y) {
|
||||
; ALL-LABEL: test_i32_srem:
|
||||
; ARM-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
|
||||
; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
|
||||
; ARM-HWDIV: sub r0, r0, [[P]]
|
||||
; THUMB-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
|
||||
; THUMB-HWDIV: mls r0, [[Q]], r1, r0
|
||||
; AEABI-NOHWDIV: bl __aeabi_idivmod
|
||||
; AEABI-NOHWDIV: mov r0, r1
|
||||
%r = srem i32 %x, %y
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define arm_aapcscc i32 @test_i32_urem(i32 %x, i32 %y) {
|
||||
; ALL-LABEL: test_i32_urem:
|
||||
; ARM-HWDIV: udiv [[Q:r[0-9]+]], r0, r1
|
||||
; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
|
||||
; ARM-HWDIV: sub r0, r0, [[P]]
|
||||
; THUMB-HWDIV: udiv [[Q:r[0-9]+]], r0, r1
|
||||
; THUMB-HWDIV: mls r0, [[Q]], r1, r0
|
||||
; AEABI-NOHWDIV: bl __aeabi_uidivmod
|
||||
; AEABI-NOHWDIV: mov r0, r1
|
||||
%r = urem i32 %x, %y
|
||||
ret i32 %r
|
||||
}
|
Loading…
Reference in New Issue
Block a user