mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[Intrinsic] Add fixed point saturating division intrinsics.
Summary: This patch adds intrinsics and ISelDAG nodes for signed and unsigned fixed-point division: ``` llvm.sdiv.fix.sat.* llvm.udiv.fix.sat.* ``` These intrinsics perform scaled, saturating division on two integers or vectors of integers. They are required for the implementation of the Embedded-C fixed-point arithmetic in Clang. Reviewers: bjope, leonardchan, craig.topper Subscribers: hiraditya, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71550
This commit is contained in:
parent
27f50511fb
commit
4f8b0d2f56
130
docs/LangRef.rst
130
docs/LangRef.rst
@ -14331,6 +14331,136 @@ Examples
|
||||
%res = call i4 @llvm.udiv.fix.i4(i4 3, i4 4, i32 1) ; %res = 2 (or 1) (1.5 / 2 = 0.75)
|
||||
|
||||
|
||||
'``llvm.sdiv.fix.sat.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax
|
||||
"""""""
|
||||
|
||||
This is an overloaded intrinsic. You can use ``llvm.sdiv.fix.sat``
|
||||
on any integer bit width or vectors of integers.
|
||||
|
||||
::
|
||||
|
||||
declare i16 @llvm.sdiv.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
|
||||
declare i32 @llvm.sdiv.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
|
||||
declare i64 @llvm.sdiv.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
|
||||
declare <4 x i32> @llvm.sdiv.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
|
||||
|
||||
Overview
|
||||
"""""""""
|
||||
|
||||
The '``llvm.sdiv.fix.sat``' family of intrinsic functions perform signed
|
||||
fixed point saturation division on 2 arguments of the same scale.
|
||||
|
||||
Arguments
|
||||
""""""""""
|
||||
|
||||
The arguments (%a and %b) and the result may be of integer types of any bit
|
||||
width, but they must have the same bit width. ``%a`` and ``%b`` are the two
|
||||
values that will undergo signed fixed point division. The argument
|
||||
``%scale`` represents the scale of both operands, and must be a constant
|
||||
integer.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
This operation performs fixed point division on the 2 arguments of a
|
||||
specified scale. The result will also be returned in the same scale specified
|
||||
in the third argument.
|
||||
|
||||
If the result value cannot be precisely represented in the given scale, the
|
||||
value is rounded up or down to the closest representable value. The rounding
|
||||
direction is unspecified.
|
||||
|
||||
The maximum value this operation can clamp to is the largest signed value
|
||||
representable by the bit width of the first 2 arguments. The minimum value is the
|
||||
smallest signed value representable by this bit width.
|
||||
|
||||
It is undefined behavior if the second argument is zero.
|
||||
|
||||
|
||||
Examples
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 6, i4 2, i32 0) ; %res = 3 (6 / 2 = 3)
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 6, i4 4, i32 1) ; %res = 3 (3 / 2 = 1.5)
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 3, i4 -2, i32 1) ; %res = -3 (1.5 / -1 = -1.5)
|
||||
|
||||
; The result in the following could be rounded up to 1 or down to 0.5
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 3, i4 4, i32 1) ; %res = 2 (or 1) (1.5 / 2 = 0.75)
|
||||
|
||||
; Saturation
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 -8, i4 -1, i32 0) ; %res = 7 (-8 / -1 = 8 => 7)
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 4, i4 2, i32 2) ; %res = 7 (1 / 0.5 = 2 => 1.75)
|
||||
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 -4, i4 1, i32 2) ; %res = -8 (-1 / 0.25 = -4 => -2)
|
||||
|
||||
|
||||
'``llvm.udiv.fix.sat.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax
|
||||
"""""""
|
||||
|
||||
This is an overloaded intrinsic. You can use ``llvm.udiv.fix.sat``
|
||||
on any integer bit width or vectors of integers.
|
||||
|
||||
::
|
||||
|
||||
declare i16 @llvm.udiv.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
|
||||
declare i32 @llvm.udiv.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
|
||||
declare i64 @llvm.udiv.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
|
||||
declare <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
|
||||
|
||||
Overview
|
||||
"""""""""
|
||||
|
||||
The '``llvm.udiv.fix.sat``' family of intrinsic functions perform unsigned
|
||||
fixed point saturation division on 2 arguments of the same scale.
|
||||
|
||||
Arguments
|
||||
""""""""""
|
||||
|
||||
The arguments (%a and %b) and the result may be of integer types of any bit
|
||||
width, but they must have the same bit width. ``%a`` and ``%b`` are the two
|
||||
values that will undergo unsigned fixed point division. The argument
|
||||
``%scale`` represents the scale of both operands, and must be a constant
|
||||
integer.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
This operation performs fixed point division on the 2 arguments of a
|
||||
specified scale. The result will also be returned in the same scale specified
|
||||
in the third argument.
|
||||
|
||||
If the result value cannot be precisely represented in the given scale, the
|
||||
value is rounded up or down to the closest representable value. The rounding
|
||||
direction is unspecified.
|
||||
|
||||
The maximum value this operation can clamp to is the largest unsigned value
|
||||
representable by the bit width of the first 2 arguments. The minimum value is the
|
||||
smallest unsigned value representable by this bit width (zero).
|
||||
|
||||
It is undefined behavior if the second argument is zero.
|
||||
|
||||
Examples
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%res = call i4 @llvm.udiv.fix.sat.i4(i4 6, i4 2, i32 0) ; %res = 3 (6 / 2 = 3)
|
||||
%res = call i4 @llvm.udiv.fix.sat.i4(i4 6, i4 4, i32 1) ; %res = 3 (3 / 2 = 1.5)
|
||||
|
||||
; The result in the following could be rounded down to 0.5 or up to 1
|
||||
%res = call i4 @llvm.udiv.fix.sat.i4(i4 3, i4 4, i32 1) ; %res = 1 (or 2) (1.5 / 2 = 0.75)
|
||||
|
||||
; Saturation
|
||||
%res = call i4 @llvm.udiv.fix.sat.i4(i4 8, i4 2, i32 2) ; %res = 15 (2 / 0.5 = 4 => 3.75)
|
||||
|
||||
|
||||
Specialised Arithmetic Intrinsics
|
||||
---------------------------------
|
||||
|
||||
|
@ -291,6 +291,11 @@ namespace ISD {
|
||||
/// constant integer.
|
||||
SDIVFIX, UDIVFIX,
|
||||
|
||||
/// Same as the corresponding unsaturated fixed point instructions, but the
|
||||
/// result is clamped between the min and max values representable by the
|
||||
/// bits of the first 2 operands.
|
||||
SDIVFIXSAT, UDIVFIXSAT,
|
||||
|
||||
/// Simple binary floating point operators.
|
||||
FADD, FSUB, FMUL, FDIV, FREM,
|
||||
|
||||
|
@ -1043,7 +1043,9 @@ public:
|
||||
case ISD::UMULFIX:
|
||||
case ISD::UMULFIXSAT:
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT:
|
||||
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
|
||||
break;
|
||||
}
|
||||
@ -4269,7 +4271,7 @@ public:
|
||||
/// method accepts integers as its arguments.
|
||||
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
|
||||
|
||||
/// Method for building the DAG expansion of ISD::[US]DIVFIX. This
|
||||
/// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
|
||||
/// method accepts integers as its arguments.
|
||||
/// Note: This method may fail if the division could not be performed
|
||||
/// within the type. Clients must retry with a wider type if this happens.
|
||||
|
@ -969,6 +969,14 @@ def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
|
||||
|
||||
def int_sdiv_fix_sat : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<2>]>;
|
||||
|
||||
def int_udiv_fix_sat : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<2>]>;
|
||||
|
||||
//===------------------------- Memory Use Markers -------------------------===//
|
||||
//
|
||||
def int_lifetime_start : Intrinsic<[],
|
||||
|
@ -402,7 +402,9 @@ def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>
|
||||
def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
|
||||
def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
|
||||
def sdivfix : SDNode<"ISD::SDIVFIX" , SDTIntScaledBinOp>;
|
||||
def sdivfixsat : SDNode<"ISD::SDIVFIXSAT", SDTIntScaledBinOp>;
|
||||
def udivfix : SDNode<"ISD::UDIVFIX" , SDTIntScaledBinOp>;
|
||||
def udivfixsat : SDNode<"ISD::UDIVFIXSAT", SDTIntScaledBinOp>;
|
||||
|
||||
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
|
||||
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
|
||||
|
@ -1132,7 +1132,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
||||
case ISD::UMULFIX:
|
||||
case ISD::UMULFIXSAT:
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::UDIVFIX: {
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT: {
|
||||
unsigned Scale = Node->getConstantOperandVal(2);
|
||||
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
|
||||
Node->getValueType(0), Scale);
|
||||
@ -3489,7 +3491,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
|
||||
break;
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT:
|
||||
if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
|
||||
Node->getOperand(0),
|
||||
Node->getOperand(1),
|
||||
|
@ -162,7 +162,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
|
||||
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break;
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break;
|
||||
|
||||
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
|
||||
|
||||
@ -784,22 +786,51 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
|
||||
N->getOperand(2));
|
||||
}
|
||||
|
||||
static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
|
||||
unsigned SatW, bool Signed,
|
||||
const TargetLowering &TLI,
|
||||
SelectionDAG &DAG) {
|
||||
EVT VT = V.getValueType();
|
||||
unsigned VTW = VT.getScalarSizeInBits();
|
||||
|
||||
if (!Signed) {
|
||||
// Saturate to the unsigned maximum by getting the minimum of V and the
|
||||
// maximum.
|
||||
return DAG.getNode(ISD::UMIN, dl, VT, V,
|
||||
DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
|
||||
dl, VT));
|
||||
}
|
||||
|
||||
// Saturate to the signed maximum (the low SatW - 1 bits) by taking the
|
||||
// signed minimum of it and V.
|
||||
V = DAG.getNode(ISD::SMIN, dl, VT, V,
|
||||
DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
|
||||
dl, VT));
|
||||
// Saturate to the signed minimum (the high SatW + 1 bits) by taking the
|
||||
// signed maximum of it and V.
|
||||
V = DAG.getNode(ISD::SMAX, dl, VT, V,
|
||||
DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
|
||||
dl, VT));
|
||||
return V;
|
||||
}
|
||||
|
||||
static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
|
||||
unsigned Scale, const TargetLowering &TLI,
|
||||
SelectionDAG &DAG) {
|
||||
unsigned Scale, const TargetLowering &TLI,
|
||||
SelectionDAG &DAG, unsigned SatW = 0) {
|
||||
EVT VT = LHS.getValueType();
|
||||
bool Signed = N->getOpcode() == ISD::SDIVFIX;
|
||||
unsigned VTSize = VT.getScalarSizeInBits();
|
||||
bool Signed = N->getOpcode() == ISD::SDIVFIX ||
|
||||
N->getOpcode() == ISD::SDIVFIXSAT;
|
||||
bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
|
||||
N->getOpcode() == ISD::UDIVFIXSAT;
|
||||
|
||||
SDLoc dl(N);
|
||||
// See if we can perform the division in this type without widening.
|
||||
if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
|
||||
DAG))
|
||||
return V;
|
||||
|
||||
// If that didn't work, double the type width and try again. That must work,
|
||||
// or something is wrong.
|
||||
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
|
||||
VT.getScalarSizeInBits() * 2);
|
||||
// Widen the types by a factor of two. This is guaranteed to expand, since it
|
||||
// will always have enough high bits in the LHS to shift into.
|
||||
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
|
||||
if (VT.isVector())
|
||||
WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
|
||||
VT.getVectorElementCount());
|
||||
if (Signed) {
|
||||
LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
|
||||
RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
|
||||
@ -808,18 +839,28 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
|
||||
RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
|
||||
}
|
||||
|
||||
// TODO: Saturation.
|
||||
|
||||
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
|
||||
DAG);
|
||||
assert(Res && "Expanding DIVFIX with wide type failed?");
|
||||
if (Saturating) {
|
||||
// If the caller has told us to saturate at something less, use that width
|
||||
// instead of the type before doubling. However, it cannot be more than
|
||||
// what we just widened!
|
||||
assert(SatW <= VTSize &&
|
||||
"Tried to saturate to more than the original type?");
|
||||
Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
|
||||
TLI, DAG);
|
||||
}
|
||||
return DAG.getZExtOrTrunc(Res, dl, VT);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
SDValue Op1Promoted, Op2Promoted;
|
||||
bool Signed = N->getOpcode() == ISD::SDIVFIX;
|
||||
bool Signed = N->getOpcode() == ISD::SDIVFIX ||
|
||||
N->getOpcode() == ISD::SDIVFIXSAT;
|
||||
bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
|
||||
N->getOpcode() == ISD::UDIVFIXSAT;
|
||||
if (Signed) {
|
||||
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
|
||||
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
|
||||
@ -830,23 +871,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
|
||||
EVT PromotedType = Op1Promoted.getValueType();
|
||||
unsigned Scale = N->getConstantOperandVal(2);
|
||||
|
||||
SDValue Res;
|
||||
// If the type is already legal and the operation is legal in that type, we
|
||||
// should not early expand.
|
||||
if (TLI.isTypeLegal(PromotedType)) {
|
||||
TargetLowering::LegalizeAction Action =
|
||||
TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
|
||||
if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
|
||||
Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
|
||||
Op2Promoted, N->getOperand(2));
|
||||
if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
|
||||
EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
|
||||
unsigned Diff = PromotedType.getScalarSizeInBits() -
|
||||
N->getValueType(0).getScalarSizeInBits();
|
||||
if (Saturating)
|
||||
Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
|
||||
DAG.getConstant(Diff, dl, ShiftTy));
|
||||
SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
|
||||
Op2Promoted, N->getOperand(2));
|
||||
if (Saturating)
|
||||
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
|
||||
DAG.getConstant(Diff, dl, ShiftTy));
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Res)
|
||||
Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
|
||||
|
||||
// TODO: Saturation.
|
||||
|
||||
return Res;
|
||||
// See if we can perform the division in this type without expanding.
|
||||
if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
|
||||
Op2Promoted, Scale, DAG)) {
|
||||
if (Saturating)
|
||||
Res = SaturateWidenedDIVFIX(Res, dl,
|
||||
N->getValueType(0).getScalarSizeInBits(),
|
||||
Signed, TLI, DAG);
|
||||
return Res;
|
||||
}
|
||||
// If we cannot, expand it to twice the type width. If we are saturating, give
|
||||
// it the original width as a saturating width so we don't need to emit
|
||||
// two saturations.
|
||||
return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
|
||||
N->getValueType(0).getScalarSizeInBits());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
|
||||
@ -1315,7 +1374,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::UMULFIX:
|
||||
case ISD::UMULFIXSAT:
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
|
||||
|
||||
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
|
||||
|
||||
@ -1923,7 +1984,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
|
||||
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
|
||||
|
||||
case ISD::VECREDUCE_ADD:
|
||||
case ISD::VECREDUCE_MUL:
|
||||
@ -3253,8 +3316,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
|
||||
N->getConstantOperandVal(2), TLI, DAG);
|
||||
SDLoc dl(N);
|
||||
// Try expanding in the existing type first.
|
||||
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
|
||||
N->getOperand(1),
|
||||
N->getConstantOperandVal(2), DAG);
|
||||
|
||||
if (!Res)
|
||||
Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
|
||||
N->getConstantOperandVal(2), TLI, DAG);
|
||||
SplitInteger(Res, Lo, Hi);
|
||||
}
|
||||
|
||||
|
@ -142,7 +142,7 @@ class VectorLegalizer {
|
||||
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
SDValue ExpandFixedPointDiv(SDNode *Node);
|
||||
void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
SDValue ExpandStrictFPOp(SDNode *Node);
|
||||
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
|
||||
|
||||
@ -463,7 +463,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case ISD::UMULFIX:
|
||||
case ISD::UMULFIXSAT:
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::UDIVFIX: {
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT: {
|
||||
unsigned Scale = Node->getConstantOperandVal(2);
|
||||
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
|
||||
Node->getValueType(0), Scale);
|
||||
@ -968,8 +970,11 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
|
||||
break;
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::UDIVFIX:
|
||||
Results.push_back(ExpandFixedPointDiv(Node));
|
||||
ExpandFixedPointDiv(Node, Results);
|
||||
return;
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIXSAT:
|
||||
break;
|
||||
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
|
||||
case ISD::STRICT_##DAGN:
|
||||
#include "llvm/IR/ConstrainedOps.def"
|
||||
@ -1454,12 +1459,12 @@ void VectorLegalizer::ExpandMULO(SDNode *Node,
|
||||
Results.push_back(Overflow);
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
|
||||
void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
|
||||
SmallVectorImpl<SDValue> &Results) {
|
||||
SDNode *N = Node;
|
||||
if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
|
||||
N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
|
||||
return Expanded;
|
||||
return DAG.UnrollVectorOp(N);
|
||||
Results.push_back(Expanded);
|
||||
}
|
||||
|
||||
void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
|
||||
|
@ -166,7 +166,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::UMULFIX:
|
||||
case ISD::UMULFIXSAT:
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT:
|
||||
R = ScalarizeVecRes_FIX(N);
|
||||
break;
|
||||
}
|
||||
@ -956,7 +958,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::UMULFIX:
|
||||
case ISD::UMULFIXSAT:
|
||||
case ISD::SDIVFIX:
|
||||
case ISD::SDIVFIXSAT:
|
||||
case ISD::UDIVFIX:
|
||||
case ISD::UDIVFIXSAT:
|
||||
SplitVecRes_FIX(N, Lo, Hi);
|
||||
break;
|
||||
}
|
||||
|
@ -5451,7 +5451,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
|
||||
SDValue LHS, SDValue RHS, SDValue Scale,
|
||||
SelectionDAG &DAG, const TargetLowering &TLI) {
|
||||
EVT VT = LHS.getValueType();
|
||||
bool Signed = Opcode == ISD::SDIVFIX;
|
||||
bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
|
||||
bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
|
||||
LLVMContext &Ctx = *DAG.getContext();
|
||||
|
||||
// If the type is legal but the operation isn't, this node might survive all
|
||||
@ -5463,14 +5464,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
|
||||
// by bumping the size by one bit. This will force it to Promote, enabling the
|
||||
// early expansion and avoiding the need to expand later.
|
||||
|
||||
// We don't have to do this if Scale is 0; that can always be expanded.
|
||||
// We don't have to do this if Scale is 0; that can always be expanded, unless
|
||||
// it's a saturating signed operation. Those can experience true integer
|
||||
// division overflow, a case which we must avoid.
|
||||
|
||||
// FIXME: We wouldn't have to do this (or any of the early
|
||||
// expansion/promotion) if it was possible to expand a libcall of an
|
||||
// illegal type during operation legalization. But it's not, so things
|
||||
// get a bit hacky.
|
||||
unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
|
||||
if (ScaleInt > 0 &&
|
||||
if ((ScaleInt > 0 || (Saturating && Signed)) &&
|
||||
(TLI.isTypeLegal(VT) ||
|
||||
(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
|
||||
TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
|
||||
@ -5492,8 +5495,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
|
||||
LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
|
||||
RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
|
||||
}
|
||||
// TODO: Saturation.
|
||||
EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
|
||||
// For saturating operations, we need to shift up the LHS to get the
|
||||
// proper saturation width, and then shift down again afterwards.
|
||||
if (Saturating)
|
||||
LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
|
||||
DAG.getConstant(1, DL, ShiftTy));
|
||||
SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
|
||||
if (Saturating)
|
||||
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
|
||||
DAG.getConstant(1, DL, ShiftTy));
|
||||
return DAG.getZExtOrTrunc(Res, DL, VT);
|
||||
}
|
||||
}
|
||||
@ -5757,6 +5768,10 @@ static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
|
||||
return ISD::SDIVFIX;
|
||||
case Intrinsic::udiv_fix:
|
||||
return ISD::UDIVFIX;
|
||||
case Intrinsic::sdiv_fix_sat:
|
||||
return ISD::SDIVFIXSAT;
|
||||
case Intrinsic::udiv_fix_sat:
|
||||
return ISD::UDIVFIXSAT;
|
||||
default:
|
||||
llvm_unreachable("Unhandled fixed point intrinsic");
|
||||
}
|
||||
@ -6460,7 +6475,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
return;
|
||||
}
|
||||
case Intrinsic::sdiv_fix:
|
||||
case Intrinsic::udiv_fix: {
|
||||
case Intrinsic::udiv_fix:
|
||||
case Intrinsic::sdiv_fix_sat:
|
||||
case Intrinsic::udiv_fix_sat: {
|
||||
SDValue Op1 = getValue(I.getArgOperand(0));
|
||||
SDValue Op2 = getValue(I.getArgOperand(1));
|
||||
SDValue Op3 = getValue(I.getArgOperand(2));
|
||||
|
@ -314,7 +314,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::UMULFIXSAT: return "umulfixsat";
|
||||
|
||||
case ISD::SDIVFIX: return "sdivfix";
|
||||
case ISD::SDIVFIXSAT: return "sdivfixsat";
|
||||
case ISD::UDIVFIX: return "udivfix";
|
||||
case ISD::UDIVFIXSAT: return "udivfixsat";
|
||||
|
||||
// Conversion operators.
|
||||
case ISD::SIGN_EXTEND: return "sign_extend";
|
||||
|
@ -7332,12 +7332,13 @@ SDValue
|
||||
TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
|
||||
SDValue LHS, SDValue RHS,
|
||||
unsigned Scale, SelectionDAG &DAG) const {
|
||||
assert((Opcode == ISD::SDIVFIX ||
|
||||
Opcode == ISD::UDIVFIX) &&
|
||||
assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
|
||||
Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
|
||||
"Expected a fixed point division opcode");
|
||||
|
||||
EVT VT = LHS.getValueType();
|
||||
bool Signed = Opcode == ISD::SDIVFIX;
|
||||
bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
|
||||
bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
|
||||
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
|
||||
|
||||
// If there is enough room in the type to upscale the LHS or downscale the
|
||||
@ -7349,7 +7350,15 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
|
||||
: DAG.computeKnownBits(LHS).countMinLeadingZeros();
|
||||
unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
|
||||
|
||||
if (LHSLead + RHSTrail < Scale)
|
||||
// For signed saturating operations, we need to be able to detect true integer
|
||||
// division overflow; that is, when you have MIN / -EPS. However, this
|
||||
// is undefined behavior and if we emit divisions that could take such
|
||||
// values it may cause undesired behavior (arithmetic exceptions on x86, for
|
||||
// example).
|
||||
// Avoid this by requiring an extra bit so that we never get this case.
|
||||
// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
|
||||
// signed saturating division, we need to emit a whopping 32-bit division.
|
||||
if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
|
||||
return SDValue();
|
||||
|
||||
unsigned LHSShift = std::min(LHSLead, Scale);
|
||||
@ -7403,8 +7412,6 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
|
||||
Quot = DAG.getNode(ISD::UDIV, dl, VT,
|
||||
LHS, RHS);
|
||||
|
||||
// TODO: Saturation.
|
||||
|
||||
return Quot;
|
||||
}
|
||||
|
||||
|
@ -660,7 +660,9 @@ void TargetLoweringBase::initActions() {
|
||||
setOperationAction(ISD::UMULFIX, VT, Expand);
|
||||
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
|
||||
setOperationAction(ISD::SDIVFIX, VT, Expand);
|
||||
setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
|
||||
setOperationAction(ISD::UDIVFIX, VT, Expand);
|
||||
setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
|
||||
|
||||
// Overflow operations default to expand
|
||||
setOperationAction(ISD::SADDO, VT, Expand);
|
||||
|
@ -4727,7 +4727,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
||||
case Intrinsic::umul_fix:
|
||||
case Intrinsic::umul_fix_sat:
|
||||
case Intrinsic::sdiv_fix:
|
||||
case Intrinsic::udiv_fix: {
|
||||
case Intrinsic::sdiv_fix_sat:
|
||||
case Intrinsic::udiv_fix:
|
||||
case Intrinsic::udiv_fix_sat: {
|
||||
Value *Op1 = Call.getArgOperand(0);
|
||||
Value *Op2 = Call.getArgOperand(1);
|
||||
Assert(Op1->getType()->isIntOrIntVectorTy(),
|
||||
@ -4742,7 +4744,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
||||
"third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
|
||||
|
||||
if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat ||
|
||||
ID == Intrinsic::sdiv_fix) {
|
||||
ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) {
|
||||
Assert(
|
||||
Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
|
||||
"the scale of s[mul|div]_fix[_sat] must be less than the width of "
|
||||
|
1411
test/CodeGen/X86/sdiv_fix_sat.ll
Normal file
1411
test/CodeGen/X86/sdiv_fix_sat.ll
Normal file
File diff suppressed because it is too large
Load Diff
528
test/CodeGen/X86/udiv_fix_sat.ll
Normal file
528
test/CodeGen/X86/udiv_fix_sat.ll
Normal file
@ -0,0 +1,528 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
|
||||
|
||||
declare i4 @llvm.udiv.fix.sat.i4 (i4, i4, i32)
|
||||
declare i15 @llvm.udiv.fix.sat.i15 (i15, i15, i32)
|
||||
declare i16 @llvm.udiv.fix.sat.i16 (i16, i16, i32)
|
||||
declare i18 @llvm.udiv.fix.sat.i18 (i18, i18, i32)
|
||||
declare i64 @llvm.udiv.fix.sat.i64 (i64, i64, i32)
|
||||
declare <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
|
||||
|
||||
define i16 @func(i16 %x, i16 %y) nounwind {
|
||||
; X64-LABEL: func:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzwl %si, %ecx
|
||||
; X64-NEXT: movzwl %di, %eax
|
||||
; X64-NEXT: shll $8, %eax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divl %ecx
|
||||
; X64-NEXT: cmpl $131071, %eax # imm = 0x1FFFF
|
||||
; X64-NEXT: movl $131071, %ecx # imm = 0x1FFFF
|
||||
; X64-NEXT: cmovael %ecx, %eax
|
||||
; X64-NEXT: shrl %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movzwl %ax, %eax
|
||||
; X86-NEXT: shll $8, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: divl %ecx
|
||||
; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF
|
||||
; X86-NEXT: movl $131071, %ecx # imm = 0x1FFFF
|
||||
; X86-NEXT: cmovael %ecx, %eax
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i16 @llvm.udiv.fix.sat.i16(i16 %x, i16 %y, i32 7)
|
||||
ret i16 %tmp
|
||||
}
|
||||
|
||||
define i16 @func2(i8 %x, i8 %y) nounwind {
|
||||
; X64-LABEL: func2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movsbl %dil, %eax
|
||||
; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
|
||||
; X64-NEXT: movsbl %sil, %ecx
|
||||
; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
|
||||
; X64-NEXT: shll $14, %eax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divl %ecx
|
||||
; X64-NEXT: cmpl $32767, %eax # imm = 0x7FFF
|
||||
; X64-NEXT: movl $32767, %ecx # imm = 0x7FFF
|
||||
; X64-NEXT: cmovbl %eax, %ecx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: movswl %cx, %eax
|
||||
; X64-NEXT: shrl %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF
|
||||
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
|
||||
; X86-NEXT: shll $14, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: divl %ecx
|
||||
; X86-NEXT: cmpl $32767, %eax # imm = 0x7FFF
|
||||
; X86-NEXT: movl $32767, %ecx # imm = 0x7FFF
|
||||
; X86-NEXT: cmovbl %eax, %ecx
|
||||
; X86-NEXT: addl %ecx, %ecx
|
||||
; X86-NEXT: movswl %cx, %eax
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
%x2 = sext i8 %x to i15
|
||||
%y2 = sext i8 %y to i15
|
||||
%tmp = call i15 @llvm.udiv.fix.sat.i15(i15 %x2, i15 %y2, i32 14)
|
||||
%tmp2 = sext i15 %tmp to i16
|
||||
ret i16 %tmp2
|
||||
}
|
||||
|
||||
define i16 @func3(i15 %x, i8 %y) nounwind {
|
||||
; X64-LABEL: func3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: leal (%rdi,%rdi), %eax
|
||||
; X64-NEXT: movzbl %sil, %ecx
|
||||
; X64-NEXT: shll $4, %ecx
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divw %cx
|
||||
; X64-NEXT: # kill: def $ax killed $ax def $eax
|
||||
; X64-NEXT: movzwl %ax, %ecx
|
||||
; X64-NEXT: cmpl $32767, %ecx # imm = 0x7FFF
|
||||
; X64-NEXT: movl $32767, %ecx # imm = 0x7FFF
|
||||
; X64-NEXT: cmovbl %eax, %ecx
|
||||
; X64-NEXT: addl %ecx, %ecx
|
||||
; X64-NEXT: movswl %cx, %eax
|
||||
; X64-NEXT: shrl %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: addl %eax, %eax
|
||||
; X86-NEXT: movzbl %cl, %ecx
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: divw %cx
|
||||
; X86-NEXT: # kill: def $ax killed $ax def $eax
|
||||
; X86-NEXT: movzwl %ax, %ecx
|
||||
; X86-NEXT: cmpl $32767, %ecx # imm = 0x7FFF
|
||||
; X86-NEXT: movl $32767, %ecx # imm = 0x7FFF
|
||||
; X86-NEXT: cmovbl %eax, %ecx
|
||||
; X86-NEXT: addl %ecx, %ecx
|
||||
; X86-NEXT: movswl %cx, %eax
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
%y2 = sext i8 %y to i15
|
||||
%y3 = shl i15 %y2, 7
|
||||
%tmp = call i15 @llvm.udiv.fix.sat.i15(i15 %x, i15 %y3, i32 4)
|
||||
%tmp2 = sext i15 %tmp to i16
|
||||
ret i16 %tmp2
|
||||
}
|
||||
|
||||
define i4 @func4(i4 %x, i4 %y) nounwind {
|
||||
; X64-LABEL: func4:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: andb $15, %sil
|
||||
; X64-NEXT: andb $15, %dil
|
||||
; X64-NEXT: shlb $2, %dil
|
||||
; X64-NEXT: movzbl %dil, %eax
|
||||
; X64-NEXT: divb %sil
|
||||
; X64-NEXT: movzbl %al, %ecx
|
||||
; X64-NEXT: cmpb $15, %cl
|
||||
; X64-NEXT: movl $15, %eax
|
||||
; X64-NEXT: cmovbl %ecx, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func4:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: andb $15, %cl
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: andb $15, %al
|
||||
; X86-NEXT: shlb $2, %al
|
||||
; X86-NEXT: movzbl %al, %eax
|
||||
; X86-NEXT: divb %cl
|
||||
; X86-NEXT: movzbl %al, %ecx
|
||||
; X86-NEXT: cmpb $15, %al
|
||||
; X86-NEXT: movl $15, %eax
|
||||
; X86-NEXT: cmovbl %ecx, %eax
|
||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i4 @llvm.udiv.fix.sat.i4(i4 %x, i4 %y, i32 2)
|
||||
ret i4 %tmp
|
||||
}
|
||||
|
||||
define i64 @func5(i64 %x, i64 %y) nounwind {
|
||||
; X64-LABEL: func5:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rbx
|
||||
; X64-NEXT: movq %rsi, %rdx
|
||||
; X64-NEXT: leaq (%rdi,%rdi), %rsi
|
||||
; X64-NEXT: shrq $33, %rsi
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
|
||||
; X64-NEXT: orq %rax, %rsi
|
||||
; X64-NEXT: shlq $32, %rdi
|
||||
; X64-NEXT: xorl %ebx, %ebx
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: callq __udivti3
|
||||
; X64-NEXT: cmpq $-1, %rax
|
||||
; X64-NEXT: movq $-1, %rcx
|
||||
; X64-NEXT: cmovbq %rax, %rcx
|
||||
; X64-NEXT: cmpq $1, %rdx
|
||||
; X64-NEXT: movl $1, %esi
|
||||
; X64-NEXT: cmovbq %rdx, %rsi
|
||||
; X64-NEXT: sbbq %rbx, %rbx
|
||||
; X64-NEXT: notq %rbx
|
||||
; X64-NEXT: orq %rax, %rbx
|
||||
; X64-NEXT: cmpq $1, %rdx
|
||||
; X64-NEXT: cmoveq %rcx, %rbx
|
||||
; X64-NEXT: shrdq $1, %rsi, %rbx
|
||||
; X64-NEXT: movq %rbx, %rax
|
||||
; X64-NEXT: popq %rbx
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func5:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $24, %esp
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: shldl $31, %eax, %ecx
|
||||
; X86-NEXT: shll $31, %eax
|
||||
; X86-NEXT: movl %esp, %esi
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl 20(%ebp)
|
||||
; X86-NEXT: pushl 16(%ebp)
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl %edx
|
||||
; X86-NEXT: pushl %ecx
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: calll __udivti3
|
||||
; X86-NEXT: addl $32, %esp
|
||||
; X86-NEXT: movl (%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: cmpl $-1, %eax
|
||||
; X86-NEXT: movl $-1, %ecx
|
||||
; X86-NEXT: movl $-1, %esi
|
||||
; X86-NEXT: cmovbl %eax, %esi
|
||||
; X86-NEXT: cmpl $-1, %edx
|
||||
; X86-NEXT: cmovel %edx, %eax
|
||||
; X86-NEXT: cmovel %esi, %eax
|
||||
; X86-NEXT: cmovael %ecx, %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: cmovnel %ecx, %edx
|
||||
; X86-NEXT: cmovnel %ecx, %eax
|
||||
; X86-NEXT: leal -4(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i64 @llvm.udiv.fix.sat.i64(i64 %x, i64 %y, i32 31)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
define i18 @func6(i16 %x, i16 %y) nounwind {
|
||||
; X64-LABEL: func6:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movswl %di, %eax
|
||||
; X64-NEXT: andl $262143, %eax # imm = 0x3FFFF
|
||||
; X64-NEXT: movswl %si, %ecx
|
||||
; X64-NEXT: andl $262143, %ecx # imm = 0x3FFFF
|
||||
; X64-NEXT: shll $7, %eax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divl %ecx
|
||||
; X64-NEXT: cmpl $262143, %eax # imm = 0x3FFFF
|
||||
; X64-NEXT: movl $262143, %ecx # imm = 0x3FFFF
|
||||
; X64-NEXT: cmovael %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func6:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: andl $262143, %ecx # imm = 0x3FFFF
|
||||
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: andl $262143, %eax # imm = 0x3FFFF
|
||||
; X86-NEXT: shll $7, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: divl %ecx
|
||||
; X86-NEXT: cmpl $262143, %eax # imm = 0x3FFFF
|
||||
; X86-NEXT: movl $262143, %ecx # imm = 0x3FFFF
|
||||
; X86-NEXT: cmovael %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
%x2 = sext i16 %x to i18
|
||||
%y2 = sext i16 %y to i18
|
||||
%tmp = call i18 @llvm.udiv.fix.sat.i18(i18 %x2, i18 %y2, i32 7)
|
||||
ret i18 %tmp
|
||||
}
|
||||
|
||||
define i16 @func7(i16 %x, i16 %y) nounwind {
|
||||
; X64-LABEL: func7:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzwl %si, %ecx
|
||||
; X64-NEXT: movzwl %di, %eax
|
||||
; X64-NEXT: addl %eax, %eax
|
||||
; X64-NEXT: shlq $16, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: cmpq $131071, %rax # imm = 0x1FFFF
|
||||
; X64-NEXT: movl $131071, %ecx # imm = 0x1FFFF
|
||||
; X64-NEXT: cmovaeq %rcx, %rax
|
||||
; X64-NEXT: shrl %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func7:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movzwl %cx, %ecx
|
||||
; X86-NEXT: addl %ecx, %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: shrl $16, %edx
|
||||
; X86-NEXT: shll $16, %ecx
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: pushl %edx
|
||||
; X86-NEXT: pushl %ecx
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF
|
||||
; X86-NEXT: movl $131071, %ecx # imm = 0x1FFFF
|
||||
; X86-NEXT: cmovael %ecx, %eax
|
||||
; X86-NEXT: testl %edx, %edx
|
||||
; X86-NEXT: cmovnel %ecx, %eax
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i16 @llvm.udiv.fix.sat.i16(i16 %x, i16 %y, i32 16)
|
||||
ret i16 %tmp
|
||||
}
|
||||
|
||||
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-LABEL: vec:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pxor %xmm8, %xmm8
|
||||
; X64-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm8[2],xmm2[3],xmm8[3]
|
||||
; X64-NEXT: movq %xmm2, %rcx
|
||||
; X64-NEXT: movdqa %xmm0, %xmm4
|
||||
; X64-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm8[2],xmm4[3],xmm8[3]
|
||||
; X64-NEXT: paddq %xmm4, %xmm4
|
||||
; X64-NEXT: psllq $31, %xmm4
|
||||
; X64-NEXT: movq %xmm4, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm7
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; X64-NEXT: movq %xmm2, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
|
||||
; X64-NEXT: movq %xmm2, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm2
|
||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm2[0]
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
|
||||
; X64-NEXT: movdqa %xmm7, %xmm2
|
||||
; X64-NEXT: pxor %xmm4, %xmm2
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm9 = [9223372043297226751,9223372043297226751]
|
||||
; X64-NEXT: movdqa %xmm9, %xmm6
|
||||
; X64-NEXT: pcmpgtd %xmm2, %xmm6
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
|
||||
; X64-NEXT: pcmpeqd %xmm9, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
|
||||
; X64-NEXT: pand %xmm3, %xmm5
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
|
||||
; X64-NEXT: por %xmm5, %xmm2
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm6 = [8589934591,8589934591]
|
||||
; X64-NEXT: pand %xmm2, %xmm7
|
||||
; X64-NEXT: pandn %xmm6, %xmm2
|
||||
; X64-NEXT: por %xmm7, %xmm2
|
||||
; X64-NEXT: psrlq $1, %xmm2
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1]
|
||||
; X64-NEXT: movq %xmm1, %rcx
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
|
||||
; X64-NEXT: paddq %xmm0, %xmm0
|
||||
; X64-NEXT: psllq $31, %xmm0
|
||||
; X64-NEXT: movq %xmm0, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm3
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: movq %xmm1, %rcx
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: movq %xmm0, %rax
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: divq %rcx
|
||||
; X64-NEXT: movq %rax, %xmm0
|
||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
|
||||
; X64-NEXT: pxor %xmm3, %xmm4
|
||||
; X64-NEXT: movdqa %xmm9, %xmm0
|
||||
; X64-NEXT: pcmpgtd %xmm4, %xmm0
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
|
||||
; X64-NEXT: pcmpeqd %xmm9, %xmm4
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
|
||||
; X64-NEXT: pand %xmm1, %xmm4
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: por %xmm4, %xmm0
|
||||
; X64-NEXT: pand %xmm0, %xmm3
|
||||
; X64-NEXT: pandn %xmm6, %xmm0
|
||||
; X64-NEXT: por %xmm3, %xmm0
|
||||
; X64-NEXT: psrlq $1, %xmm0
|
||||
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: vec:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: addl %ecx, %ecx
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: shldl $31, %ecx, %eax
|
||||
; X86-NEXT: shll $31, %ecx
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: pushl %ecx
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: cmpl $-1, %eax
|
||||
; X86-NEXT: movl $-1, %ecx
|
||||
; X86-NEXT: cmovbl %eax, %ecx
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl $0, %edi
|
||||
; X86-NEXT: sbbl %edi, %edi
|
||||
; X86-NEXT: notl %edi
|
||||
; X86-NEXT: orl %eax, %edi
|
||||
; X86-NEXT: movl %edi, %ebx
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: addl %esi, %esi
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: cmovel %ecx, %ebx
|
||||
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl $1, %ecx
|
||||
; X86-NEXT: cmovael %ecx, %edx
|
||||
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: shldl $31, %esi, %eax
|
||||
; X86-NEXT: shll $31, %esi
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: cmpl $-1, %eax
|
||||
; X86-NEXT: movl $-1, %ecx
|
||||
; X86-NEXT: cmovbl %eax, %ecx
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl $1, %esi
|
||||
; X86-NEXT: cmovbl %edx, %esi
|
||||
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl $0, %esi
|
||||
; X86-NEXT: sbbl %esi, %esi
|
||||
; X86-NEXT: notl %esi
|
||||
; X86-NEXT: orl %eax, %esi
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: addl %edi, %edi
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: cmovel %ecx, %esi
|
||||
; X86-NEXT: shldl $31, %edi, %eax
|
||||
; X86-NEXT: shll $31, %edi
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: cmpl $-1, %eax
|
||||
; X86-NEXT: movl $-1, %ebx
|
||||
; X86-NEXT: cmovbl %eax, %ebx
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl $0, %edi
|
||||
; X86-NEXT: sbbl %edi, %edi
|
||||
; X86-NEXT: notl %edi
|
||||
; X86-NEXT: orl %eax, %edi
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: addl %ebp, %ebp
|
||||
; X86-NEXT: setb %cl
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl %edx, %eax
|
||||
; X86-NEXT: movl $1, %edx
|
||||
; X86-NEXT: cmovael %edx, %eax
|
||||
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
|
||||
; X86-NEXT: cmovel %ebx, %edi
|
||||
; X86-NEXT: shldl $31, %ebp, %ecx
|
||||
; X86-NEXT: shll $31, %ebp
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl %ecx
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: cmpl $-1, %eax
|
||||
; X86-NEXT: movl $-1, %ecx
|
||||
; X86-NEXT: cmovbl %eax, %ecx
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: movl $1, %ebx
|
||||
; X86-NEXT: cmovbl %edx, %ebx
|
||||
; X86-NEXT: movl $0, %ebp
|
||||
; X86-NEXT: sbbl %ebp, %ebp
|
||||
; X86-NEXT: notl %ebp
|
||||
; X86-NEXT: orl %eax, %ebp
|
||||
; X86-NEXT: cmpl $1, %edx
|
||||
; X86-NEXT: cmovel %ecx, %ebp
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $1, %eax, %ecx
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $1, %eax, %esi
|
||||
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $1, %eax, %edi
|
||||
; X86-NEXT: shrdl $1, %ebx, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %ebp, 12(%eax)
|
||||
; X86-NEXT: movl %edi, 8(%eax)
|
||||
; X86-NEXT: movl %esi, 4(%eax)
|
||||
; X86-NEXT: movl %ecx, (%eax)
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl $4
|
||||
%tmp = call <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 31)
|
||||
ret <4 x i32> %tmp
|
||||
}
|
Loading…
Reference in New Issue
Block a user