mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
ARM: fix vectorized division on WoA
The Windows on ARM target uses custom division for normal division as the backend needs to insert division-by-zero checks. However, it is designed to only handle non-vectorized division. ARM has custom lowering for vectorized division as that can avoid loading registers with the values and invoke a division routine for each one, preferring to lower using NEON instructions. Fall back to the custom lowering for the NEON instructions if we encounter a vectorized division. Resolves PR31778! llvm-svn: 293259
This commit is contained in:
parent
39c1af708a
commit
da5a339f73
@ -7571,11 +7571,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||||
case ISD::MUL: return LowerMUL(Op, DAG);
|
case ISD::MUL: return LowerMUL(Op, DAG);
|
||||||
case ISD::SDIV:
|
case ISD::SDIV:
|
||||||
if (Subtarget->isTargetWindows())
|
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
|
||||||
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
|
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
|
||||||
return LowerSDIV(Op, DAG);
|
return LowerSDIV(Op, DAG);
|
||||||
case ISD::UDIV:
|
case ISD::UDIV:
|
||||||
if (Subtarget->isTargetWindows())
|
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
|
||||||
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
|
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
|
||||||
return LowerUDIV(Op, DAG);
|
return LowerUDIV(Op, DAG);
|
||||||
case ISD::ADDC:
|
case ISD::ADDC:
|
||||||
|
@ -1,49 +1,58 @@
|
|||||||
; RUN: llc -mtriple=arm-eabi -mattr=+neon -pre-RA-sched=source -disable-post-ra %s -o - \
|
; RUN: llc -mtriple arm-eabi -mattr=+neon -disable-post-ra -pre-RA-sched source %s -o - | FileCheck %s
|
||||||
; RUN: | FileCheck %s
|
; RUN: llc -mtriple thumbv7-windows-itanium -mattr=+neon -disable-post-ra -pre-RA-sched source %s -o - | FileCheck %s
|
||||||
|
|
||||||
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||||
;CHECK: vrecpe.f32
|
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
||||||
;CHECK: vmovn.i32
|
%tmp2 = load <8 x i8>, <8 x i8>* %B
|
||||||
;CHECK: vrecpe.f32
|
%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
|
||||||
;CHECK: vmovn.i32
|
ret <8 x i8> %tmp3
|
||||||
;CHECK: vmovn.i16
|
|
||||||
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
||||||
%tmp2 = load <8 x i8>, <8 x i8>* %B
|
|
||||||
%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
|
|
||||||
ret <8 x i8> %tmp3
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: sdivi8:
|
||||||
|
; CHECK: vrecpe.f32
|
||||||
|
; CHECK: vmovn.i32
|
||||||
|
; CHECK: vrecpe.f32
|
||||||
|
; CHECK: vmovn.i32
|
||||||
|
; CHECK: vmovn.i16
|
||||||
|
|
||||||
define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||||
;CHECK: vrecpe.f32
|
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
||||||
;CHECK: vrecps.f32
|
%tmp2 = load <8 x i8>, <8 x i8>* %B
|
||||||
;CHECK: vmovn.i32
|
%tmp3 = udiv <8 x i8> %tmp1, %tmp2
|
||||||
;CHECK: vrecpe.f32
|
ret <8 x i8> %tmp3
|
||||||
;CHECK: vrecps.f32
|
|
||||||
;CHECK: vmovn.i32
|
|
||||||
;CHECK: vqmovun.s16
|
|
||||||
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
||||||
%tmp2 = load <8 x i8>, <8 x i8>* %B
|
|
||||||
%tmp3 = udiv <8 x i8> %tmp1, %tmp2
|
|
||||||
ret <8 x i8> %tmp3
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: udivi8:
|
||||||
|
; CHECK: vrecpe.f32
|
||||||
|
; CHECK: vrecps.f32
|
||||||
|
; CHECK: vmovn.i32
|
||||||
|
; CHECK: vrecpe.f32
|
||||||
|
; CHECK: vrecps.f32
|
||||||
|
; CHECK: vmovn.i32
|
||||||
|
; CHECK: vqmovun.s16
|
||||||
|
|
||||||
define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||||
;CHECK: vrecpe.f32
|
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
||||||
;CHECK: vrecps.f32
|
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
||||||
;CHECK: vmovn.i32
|
%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
|
||||||
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
ret <4 x i16> %tmp3
|
||||||
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
|
||||||
%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
|
|
||||||
ret <4 x i16> %tmp3
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: sdivi16:
|
||||||
|
; CHECK: vrecpe.f32
|
||||||
|
; CHECK: vrecps.f32
|
||||||
|
; CHECK: vmovn.i32
|
||||||
|
|
||||||
define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||||
;CHECK: vrecpe.f32
|
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
||||||
;CHECK: vrecps.f32
|
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
||||||
;CHECK: vrecps.f32
|
%tmp3 = udiv <4 x i16> %tmp1, %tmp2
|
||||||
;CHECK: vmovn.i32
|
ret <4 x i16> %tmp3
|
||||||
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
|
||||||
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
|
||||||
%tmp3 = udiv <4 x i16> %tmp1, %tmp2
|
|
||||||
ret <4 x i16> %tmp3
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: udivi16:
|
||||||
|
; CHECK: vrecpe.f32
|
||||||
|
; CHECK: vrecps.f32
|
||||||
|
; CHECK: vrecps.f32
|
||||||
|
; CHECK: vmovn.i32
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user