mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[PowerPC] Require nsz flag for c-a*b to FNMSUB
On PowerPC, FNMSUB (both VSX and non-VSX version) means -(a*b-c). But the backend used to generate these instructions regardless whether nsz flag exists or not. If a*b-c==0, such transformation changes sign of zero. This patch introduces PPC specific FNMSUB ISD opcode, which may help improving combined FMA code sequence. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D76585
This commit is contained in:
parent
e24a4fa7a0
commit
1223486e31
@ -1228,6 +1228,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
setTargetDAGCombine(ISD::SRL);
|
||||
setTargetDAGCombine(ISD::MUL);
|
||||
setTargetDAGCombine(ISD::FMA);
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::BUILD_VECTOR);
|
||||
if (Subtarget.hasFPCVT())
|
||||
@ -1532,6 +1533,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
|
||||
case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
|
||||
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
|
||||
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -14115,6 +14117,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return combineSRL(N, DCI);
|
||||
case ISD::MUL:
|
||||
return combineMUL(N, DCI);
|
||||
case ISD::FMA:
|
||||
case PPCISD::FNMSUB:
|
||||
return combineFMALike(N, DCI);
|
||||
case PPCISD::SHL:
|
||||
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
|
||||
return N->getOperand(0);
|
||||
@ -15779,6 +15784,85 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
|
||||
return PPC::createFastISel(FuncInfo, LibInfo);
|
||||
}
|
||||
|
||||
// 'Inverted' means the FMA opcode after negating one multiplicand.
|
||||
// For example, (fma -a b c) = (fnmsub a b c)
|
||||
static unsigned invertFMAOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
llvm_unreachable("Invalid FMA opcode for PowerPC!");
|
||||
case ISD::FMA:
|
||||
return PPCISD::FNMSUB;
|
||||
case PPCISD::FNMSUB:
|
||||
return ISD::FMA;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
||||
bool LegalOps, bool OptForSize,
|
||||
NegatibleCost &Cost,
|
||||
unsigned Depth) const {
|
||||
if (Depth > SelectionDAG::MaxRecursionDepth)
|
||||
return SDValue();
|
||||
|
||||
unsigned Opc = Op.getOpcode();
|
||||
EVT VT = Op.getValueType();
|
||||
SDNodeFlags Flags = Op.getNode()->getFlags();
|
||||
|
||||
switch (Opc) {
|
||||
case PPCISD::FNMSUB:
|
||||
// TODO: QPX subtarget is deprecated. No transformation here.
|
||||
if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
|
||||
break;
|
||||
|
||||
const TargetOptions &Options = getTargetMachine().Options;
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
SDValue N2 = Op.getOperand(2);
|
||||
SDLoc Loc(Op);
|
||||
|
||||
NegatibleCost N2Cost = NegatibleCost::Expensive;
|
||||
SDValue NegN2 =
|
||||
getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
|
||||
|
||||
if (!NegN2)
|
||||
return SDValue();
|
||||
|
||||
// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
|
||||
// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
|
||||
// These transformations may change sign of zeroes. For example,
|
||||
// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
|
||||
if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
|
||||
// Try and choose the cheaper one to negate.
|
||||
NegatibleCost N0Cost = NegatibleCost::Expensive;
|
||||
SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
|
||||
N0Cost, Depth + 1);
|
||||
|
||||
NegatibleCost N1Cost = NegatibleCost::Expensive;
|
||||
SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
|
||||
N1Cost, Depth + 1);
|
||||
|
||||
if (NegN0 && N0Cost <= N1Cost) {
|
||||
Cost = std::min(N0Cost, N2Cost);
|
||||
return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
|
||||
} else if (NegN1) {
|
||||
Cost = std::min(N1Cost, N2Cost);
|
||||
return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
|
||||
}
|
||||
}
|
||||
|
||||
// (fneg (fnmsub a b c)) => (fma a b (fneg c))
|
||||
if (isOperationLegal(ISD::FMA, VT)) {
|
||||
Cost = N2Cost;
|
||||
return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
|
||||
Cost, Depth);
|
||||
}
|
||||
|
||||
// Override to enable LOAD_STACK_GUARD lowering on Linux.
|
||||
bool PPCTargetLowering::useLoadStackGuardNode() const {
|
||||
if (!Subtarget.isTargetLinux())
|
||||
@ -16185,6 +16269,45 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
}
|
||||
}
|
||||
|
||||
// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
|
||||
// in combiner since we need to check SD flags and other subtarget features.
|
||||
SDValue PPCTargetLowering::combineFMALike(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue N2 = N->getOperand(2);
|
||||
SDNodeFlags Flags = N->getFlags();
|
||||
EVT VT = N->getValueType(0);
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
const TargetOptions &Options = getTargetMachine().Options;
|
||||
unsigned Opc = N->getOpcode();
|
||||
bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
|
||||
bool LegalOps = !DCI.isBeforeLegalizeOps();
|
||||
SDLoc Loc(N);
|
||||
|
||||
// TODO: QPX subtarget is deprecated. No transformation here.
|
||||
if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT) ||
|
||||
(VT.isVector() && !Subtarget.hasVSX()))
|
||||
return SDValue();
|
||||
|
||||
// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
|
||||
// since (fnmsub a b c)=-0 while c-ab=+0.
|
||||
if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
|
||||
return SDValue();
|
||||
|
||||
// (fma (fneg a) b c) => (fnmsub a b c)
|
||||
// (fnmsub (fneg a) b c) => (fma a b c)
|
||||
if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
|
||||
return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
|
||||
|
||||
// (fma a (fneg b) c) => (fnmsub a b c)
|
||||
// (fnmsub a (fneg b) c) => (fma a b c)
|
||||
if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
|
||||
return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
|
||||
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
|
||||
if (!Subtarget.is64BitELFABI())
|
||||
|
@ -148,6 +148,9 @@ namespace llvm {
|
||||
SRA,
|
||||
SHL,
|
||||
|
||||
/// FNMSUB - Negated multiply-subtract instruction.
|
||||
FNMSUB,
|
||||
|
||||
/// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
|
||||
/// word and shift left immediate.
|
||||
EXTSWSLI,
|
||||
@ -674,6 +677,10 @@ namespace llvm {
|
||||
return VT.isScalarInteger();
|
||||
}
|
||||
|
||||
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
|
||||
bool OptForSize, NegatibleCost &Cost,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
/// getSetCCResultType - Return the ISD::SETCC ValueType
|
||||
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
||||
EVT VT) const override;
|
||||
@ -1202,6 +1209,7 @@ namespace llvm {
|
||||
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
@ -216,6 +216,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
|
||||
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
|
||||
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
|
||||
|
||||
def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>;
|
||||
|
||||
def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
|
||||
|
||||
// Move 2 i64 values into a VSX register
|
||||
@ -3381,15 +3383,19 @@ def : Pat<(atomic_fence (timm), (timm)), (SYNC 1)>, Requires<[HasSYNC]>;
|
||||
def : Pat<(atomic_fence (timm), (timm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
|
||||
|
||||
let Predicates = [HasFPU] in {
|
||||
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
|
||||
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
|
||||
(FNMSUB $A, $C, $B)>;
|
||||
def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
|
||||
(FNMSUB $A, $C, $B)>;
|
||||
def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
|
||||
(FNMSUBS $A, $C, $B)>;
|
||||
def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
|
||||
(FNMSUBS $A, $C, $B)>;
|
||||
// Additional fnmsub patterns for custom node
|
||||
def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
|
||||
(FNMSUB $A, $B, $C)>;
|
||||
def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
|
||||
(FNMSUBS $A, $B, $C)>;
|
||||
def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
|
||||
(FMSUB $A, $B, $C)>;
|
||||
def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
|
||||
(FMSUBS $A, $B, $C)>;
|
||||
def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
|
||||
(FNMADD $A, $B, $C)>;
|
||||
def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
|
||||
(FNMADDS $A, $B, $C)>;
|
||||
|
||||
// FCOPYSIGN's operand types need not agree.
|
||||
def : Pat<(fcopysign f64:$frB, f32:$frA),
|
||||
|
@ -2416,21 +2416,27 @@ def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
|
||||
(and v4i32:$B, v4i32:$C))),
|
||||
(v4i32 (XXSEL $A, $B, $C))>;
|
||||
|
||||
// Additional fnmsub patterns: -a*b + c == -(a*b - c)
|
||||
def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
|
||||
(XSNMSUBADP $C, $A, $B)>;
|
||||
def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
|
||||
// Additional fnmsub pattern for PPC specific ISD opcode
|
||||
def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
|
||||
(XSNMSUBADP $C, $A, $B)>;
|
||||
def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
|
||||
(XSMSUBADP $C, $A, $B)>;
|
||||
def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
|
||||
(XSNMADDADP $C, $A, $B)>;
|
||||
|
||||
def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
|
||||
(XVNMSUBADP $C, $A, $B)>;
|
||||
def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
|
||||
def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C),
|
||||
(XVNMSUBADP $C, $A, $B)>;
|
||||
def : Pat<(fneg (PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C)),
|
||||
(XVMSUBADP $C, $A, $B)>;
|
||||
def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, (fneg v2f64:$C)),
|
||||
(XVNMADDADP $C, $A, $B)>;
|
||||
|
||||
def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
|
||||
(XVNMSUBASP $C, $A, $B)>;
|
||||
def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
|
||||
def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
|
||||
(XVNMSUBASP $C, $A, $B)>;
|
||||
def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)),
|
||||
(XVMSUBASP $C, $A, $B)>;
|
||||
def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
|
||||
(XVNMADDASP $C, $A, $B)>;
|
||||
|
||||
def : Pat<(v2f64 (bitconvert v4f32:$A)),
|
||||
(COPY_TO_REGCLASS $A, VSRC)>;
|
||||
@ -3003,11 +3009,13 @@ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
|
||||
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
|
||||
(SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
|
||||
|
||||
// Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
|
||||
def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
|
||||
(XSNMSUBASP $C, $A, $B)>;
|
||||
def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
|
||||
// Additional fnmsub pattern for PPC specific ISD opcode
|
||||
def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
|
||||
(XSNMSUBASP $C, $A, $B)>;
|
||||
def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
|
||||
(XSMSUBASP $C, $A, $B)>;
|
||||
def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
|
||||
(XSNMADDASP $C, $A, $B)>;
|
||||
|
||||
// f32 neg
|
||||
// Although XSNEGDP is available in P7, we want to select it starting from P8,
|
||||
@ -3516,9 +3524,13 @@ def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
|
||||
// Any Power9 VSX subtarget.
|
||||
let Predicates = [HasVSX, HasP9Vector] in {
|
||||
// Additional fnmsub patterns: -a*b + c == -(a*b - c)
|
||||
def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
|
||||
def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
|
||||
// Additional fnmsub pattern for PPC specific ISD opcode
|
||||
def : Pat<(PPCfnmsub f128:$A, f128:$B, f128:$C),
|
||||
(XSNMSUBQP $C, $A, $B)>;
|
||||
def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)),
|
||||
(XSMSUBQP $C, $A, $B)>;
|
||||
def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)),
|
||||
(XSNMADDQP $C, $A, $B)>;
|
||||
|
||||
def : Pat<(f128 (sint_to_fp i64:$src)),
|
||||
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
|
||||
|
@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
|
||||
; CHECK-NEXT: xvredp 2, 0
|
||||
; CHECK-NEXT: xxswapd 1, 1
|
||||
; CHECK-NEXT: xxlor 3, 1, 1
|
||||
; CHECK-NEXT: xvmaddadp 3, 0, 2
|
||||
; CHECK-NEXT: xvnmsubadp 2, 2, 3
|
||||
; CHECK-NEXT: xvmaddadp 1, 0, 2
|
||||
; CHECK-NEXT: xvmsubadp 2, 2, 1
|
||||
; CHECK-NEXT: xvnmsubadp 3, 0, 2
|
||||
; CHECK-NEXT: xvmaddadp 2, 2, 3
|
||||
; CHECK-NEXT: xvnmsubadp 1, 0, 2
|
||||
; CHECK-NEXT: xvnmaddadp 2, 2, 1
|
||||
; CHECK-NEXT: xvmuldp 34, 34, 2
|
||||
; CHECK-NEXT: xvmuldp 35, 35, 2
|
||||
; CHECK-NEXT: blr
|
||||
|
@ -121,7 +121,7 @@ entry:
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%mul = fmul contract fp128 %1, %2
|
||||
%sub = fsub contract fp128 %0, %mul
|
||||
%sub = fsub contract nsz fp128 %0, %mul
|
||||
store fp128 %sub, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFmsub
|
||||
|
@ -10,6 +10,6 @@ define dso_local float @foo(float %0, float %1) local_unnamed_addr {
|
||||
; CHECK-NEXT: xsmaddasp 0, 3, 1
|
||||
; CHECK-NEXT: fmr 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
%3 = fdiv reassoc arcp float %0, %1
|
||||
%3 = fdiv reassoc arcp nsz float %0, %1
|
||||
ret float %3
|
||||
}
|
||||
|
@ -225,14 +225,18 @@ define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
|
||||
define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
|
||||
; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: fnmsub 0, 1, 2, 5
|
||||
; CHECK-NEXT: fnmsub 1, 3, 4, 0
|
||||
; CHECK-NEXT: fneg 0, 1
|
||||
; CHECK-NEXT: fmadd 0, 0, 2, 5
|
||||
; CHECK-NEXT: fneg 1, 3
|
||||
; CHECK-NEXT: fmadd 1, 1, 4, 0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
|
||||
; CHECK-VSX: # %bb.0:
|
||||
; CHECK-VSX-NEXT: xsnmsubmdp 1, 2, 5
|
||||
; CHECK-VSX-NEXT: xsnmsubadp 1, 3, 4
|
||||
; CHECK-VSX-NEXT: xsnegdp 1, 1
|
||||
; CHECK-VSX-NEXT: xsnegdp 0, 3
|
||||
; CHECK-VSX-NEXT: xsmaddmdp 1, 2, 5
|
||||
; CHECK-VSX-NEXT: xsmaddadp 1, 0, 4
|
||||
; CHECK-VSX-NEXT: blr
|
||||
double %D, double %E) {
|
||||
%F = fmul float %A, %B ; <float> [#uses=1]
|
||||
@ -246,15 +250,19 @@ define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
|
||||
define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
|
||||
; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: fnmsub 0, 3, 4, 5
|
||||
; CHECK-NEXT: fnmsub 1, 1, 2, 0
|
||||
; CHECK-NEXT: fneg 0, 3
|
||||
; CHECK-NEXT: fmadd 0, 0, 4, 5
|
||||
; CHECK-NEXT: fneg 1, 1
|
||||
; CHECK-NEXT: fmadd 1, 1, 2, 0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
|
||||
; CHECK-VSX: # %bb.0:
|
||||
; CHECK-VSX-NEXT: xsnmsubmdp 3, 4, 5
|
||||
; CHECK-VSX-NEXT: xsnmsubadp 3, 1, 2
|
||||
; CHECK-VSX-NEXT: fmr 1, 3
|
||||
; CHECK-VSX-NEXT: xsnegdp 0, 3
|
||||
; CHECK-VSX-NEXT: xsnegdp 1, 1
|
||||
; CHECK-VSX-NEXT: xsmaddmdp 0, 4, 5
|
||||
; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2
|
||||
; CHECK-VSX-NEXT: fmr 1, 0
|
||||
; CHECK-VSX-NEXT: blr
|
||||
float %D, double %E) {
|
||||
%F = fmul float %A, %B ; <float> [#uses=1]
|
||||
@ -503,14 +511,18 @@ define double @test_reassoc_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
|
||||
define double @test_reassoc_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
|
||||
; CHECK-LABEL: test_reassoc_FMSUB_ASSOC_EXT3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: fnmsub 0, 1, 2, 5
|
||||
; CHECK-NEXT: fnmsub 1, 3, 4, 0
|
||||
; CHECK-NEXT: fneg 0, 1
|
||||
; CHECK-NEXT: fmadd 0, 0, 2, 5
|
||||
; CHECK-NEXT: fneg 1, 3
|
||||
; CHECK-NEXT: fmadd 1, 1, 4, 0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-VSX-LABEL: test_reassoc_FMSUB_ASSOC_EXT3:
|
||||
; CHECK-VSX: # %bb.0:
|
||||
; CHECK-VSX-NEXT: xsnmsubmdp 1, 2, 5
|
||||
; CHECK-VSX-NEXT: xsnmsubadp 1, 3, 4
|
||||
; CHECK-VSX-NEXT: xsnegdp 1, 1
|
||||
; CHECK-VSX-NEXT: xsnegdp 0, 3
|
||||
; CHECK-VSX-NEXT: xsmaddmdp 1, 2, 5
|
||||
; CHECK-VSX-NEXT: xsmaddadp 1, 0, 4
|
||||
; CHECK-VSX-NEXT: blr
|
||||
double %D, double %E) {
|
||||
%F = fmul reassoc float %A, %B ; <float> [#uses=1]
|
||||
@ -521,18 +533,45 @@ define double @test_reassoc_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
|
||||
ret double %J
|
||||
}
|
||||
|
||||
; fnmsub/xsnmsubadp may affect the sign of zero, we need nsz flag
|
||||
; to ensure generating them
|
||||
define double @test_fast_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
|
||||
; CHECK-LABEL: test_fast_FMSUB_ASSOC_EXT3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: fnmsub 0, 1, 2, 5
|
||||
; CHECK-NEXT: fnmsub 1, 3, 4, 0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-VSX-LABEL: test_fast_FMSUB_ASSOC_EXT3:
|
||||
; CHECK-VSX: # %bb.0:
|
||||
; CHECK-VSX-NEXT: xsnmsubmdp 1, 2, 5
|
||||
; CHECK-VSX-NEXT: xsnmsubadp 1, 3, 4
|
||||
; CHECK-VSX-NEXT: blr
|
||||
double %D, double %E) {
|
||||
%F = fmul reassoc float %A, %B
|
||||
%G = fpext float %F to double
|
||||
%H = fmul reassoc double %C, %D
|
||||
%I = fadd reassoc nsz double %H, %G
|
||||
%J = fsub reassoc nsz double %E, %I
|
||||
ret double %J
|
||||
}
|
||||
|
||||
define double @test_reassoc_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
|
||||
; CHECK-LABEL: test_reassoc_FMSUB_ASSOC_EXT4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: fnmsub 0, 3, 4, 5
|
||||
; CHECK-NEXT: fnmsub 1, 1, 2, 0
|
||||
; CHECK-NEXT: fneg 0, 3
|
||||
; CHECK-NEXT: fmadd 0, 0, 4, 5
|
||||
; CHECK-NEXT: fneg 1, 1
|
||||
; CHECK-NEXT: fmadd 1, 1, 2, 0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-VSX-LABEL: test_reassoc_FMSUB_ASSOC_EXT4:
|
||||
; CHECK-VSX: # %bb.0:
|
||||
; CHECK-VSX-NEXT: xsnmsubmdp 3, 4, 5
|
||||
; CHECK-VSX-NEXT: xsnmsubadp 3, 1, 2
|
||||
; CHECK-VSX-NEXT: fmr 1, 3
|
||||
; CHECK-VSX-NEXT: xsnegdp 0, 3
|
||||
; CHECK-VSX-NEXT: xsnegdp 1, 1
|
||||
; CHECK-VSX-NEXT: xsmaddmdp 0, 4, 5
|
||||
; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2
|
||||
; CHECK-VSX-NEXT: fmr 1, 0
|
||||
; CHECK-VSX-NEXT: blr
|
||||
float %D, double %E) {
|
||||
%F = fmul reassoc float %A, %B ; <float> [#uses=1]
|
||||
@ -542,3 +581,25 @@ define double @test_reassoc_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
|
||||
%J = fsub reassoc double %E, %I ; <double> [#uses=1]
|
||||
ret double %J
|
||||
}
|
||||
|
||||
define double @test_fast_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
|
||||
; CHECK-LABEL: test_fast_FMSUB_ASSOC_EXT4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: fnmsub 0, 3, 4, 5
|
||||
; CHECK-NEXT: fnmsub 1, 1, 2, 0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-VSX-LABEL: test_fast_FMSUB_ASSOC_EXT4:
|
||||
; CHECK-VSX: # %bb.0:
|
||||
; CHECK-VSX-NEXT: xsnmsubmdp 3, 4, 5
|
||||
; CHECK-VSX-NEXT: xsnmsubadp 3, 1, 2
|
||||
; CHECK-VSX-NEXT: fmr 1, 3
|
||||
; CHECK-VSX-NEXT: blr
|
||||
float %D, double %E) {
|
||||
%F = fmul reassoc float %A, %B
|
||||
%G = fmul reassoc float %C, %D
|
||||
%H = fadd reassoc nsz float %F, %G
|
||||
%I = fpext float %H to double
|
||||
%J = fsub reassoc nsz double %E, %I
|
||||
ret double %J
|
||||
}
|
||||
|
@ -176,9 +176,11 @@ define float @fma_combine_no_ice() {
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI4_2@toc@ha
|
||||
; CHECK-NEXT: lfs 3, .LCPI4_1@toc@l(4)
|
||||
; CHECK-NEXT: lfs 1, .LCPI4_2@toc@l(3)
|
||||
; CHECK-NEXT: fmr 4, 3
|
||||
; CHECK-NEXT: xsmaddasp 3, 2, 0
|
||||
; CHECK-NEXT: xsnmaddasp 4, 2, 0
|
||||
; CHECK-NEXT: xsmaddasp 1, 2, 3
|
||||
; CHECK-NEXT: xsnmsubasp 1, 3, 2
|
||||
; CHECK-NEXT: xsmaddasp 1, 4, 2
|
||||
; CHECK-NEXT: blr
|
||||
%tmp = load float, float* undef, align 4
|
||||
%tmp2 = load float, float* undef, align 4
|
||||
|
@ -49,10 +49,27 @@ define double @test_FMSUB_EXT2(float %A, float %B, double %C) {
|
||||
%F = fsub double %C, %E ; <double> [#uses=1]
|
||||
ret double %F
|
||||
; CHECK-LABEL: test_FMSUB_EXT2:
|
||||
; CHECK: fnmsub
|
||||
; CHECK: fneg
|
||||
; CHECK-NEXT: fmadd
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FMSUB_EXT2:
|
||||
; CHECK-VSX: xsnegdp
|
||||
; CHECK-VSX-NEXT: xsmaddmdp
|
||||
; CHECK-VSX-NEXT: blr
|
||||
}
|
||||
|
||||
; need nsz flag to generate fnmsub since it may affect sign of zero
|
||||
define double @test_FMSUB_EXT2_NSZ(float %A, float %B, double %C) {
|
||||
%D = fmul nsz float %A, %B ; <float> [#uses=1]
|
||||
%E = fpext float %D to double ; <double> [#uses=1]
|
||||
%F = fsub nsz double %C, %E ; <double> [#uses=1]
|
||||
ret double %F
|
||||
; CHECK-LABEL: test_FMSUB_EXT2_NSZ:
|
||||
; CHECK: fnmsub
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FMSUB_EXT2_NSZ:
|
||||
; CHECK-VSX: xsnmsubmdp
|
||||
; CHECK-VSX-NEXT: blr
|
||||
}
|
||||
|
@ -7,12 +7,14 @@
|
||||
define double @test_mul_sub_f64(double %a, double %b, double %c) {
|
||||
; VSX-LABEL: test_mul_sub_f64:
|
||||
; VSX: # %bb.0: # %entry
|
||||
; VSX-NEXT: xsnmsubadp 1, 2, 3
|
||||
; VSX-NEXT: xsnegdp 0, 2
|
||||
; VSX-NEXT: xsmaddadp 1, 0, 3
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
; NO-VSX-LABEL: test_mul_sub_f64:
|
||||
; NO-VSX: # %bb.0: # %entry
|
||||
; NO-VSX-NEXT: fnmsub 1, 2, 3, 1
|
||||
; NO-VSX-NEXT: fneg 0, 2
|
||||
; NO-VSX-NEXT: fmadd 1, 0, 3, 1
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fmul contract reassoc double %b, %c
|
||||
@ -43,13 +45,15 @@ entry:
|
||||
define double @test_neg_fma_f64(double %a, double %b, double %c) {
|
||||
; VSX-LABEL: test_neg_fma_f64:
|
||||
; VSX: # %bb.0: # %entry
|
||||
; VSX-NEXT: xsnmsubadp 3, 1, 2
|
||||
; VSX-NEXT: xsnegdp 0, 1
|
||||
; VSX-NEXT: xsmaddadp 3, 0, 2
|
||||
; VSX-NEXT: fmr 1, 3
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
; NO-VSX-LABEL: test_neg_fma_f64:
|
||||
; NO-VSX: # %bb.0: # %entry
|
||||
; NO-VSX-NEXT: fnmsub 1, 1, 2, 3
|
||||
; NO-VSX-NEXT: fneg 0, 1
|
||||
; NO-VSX-NEXT: fmadd 1, 0, 2, 3
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fsub contract reassoc double -0.0, %a
|
||||
@ -61,12 +65,14 @@ entry:
|
||||
define float @test_mul_sub_f32(float %a, float %b, float %c) {
|
||||
; VSX-LABEL: test_mul_sub_f32:
|
||||
; VSX: # %bb.0: # %entry
|
||||
; VSX-NEXT: xsnmsubasp 1, 2, 3
|
||||
; VSX-NEXT: xsnegdp 0, 2
|
||||
; VSX-NEXT: xsmaddasp 1, 0, 3
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
; NO-VSX-LABEL: test_mul_sub_f32:
|
||||
; NO-VSX: # %bb.0: # %entry
|
||||
; NO-VSX-NEXT: fnmsubs 1, 2, 3, 1
|
||||
; NO-VSX-NEXT: fneg 0, 2
|
||||
; NO-VSX-NEXT: fmadds 1, 0, 3, 1
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fmul contract reassoc float %b, %c
|
||||
@ -97,13 +103,15 @@ entry:
|
||||
define float @test_neg_fma_f32(float %a, float %b, float %c) {
|
||||
; VSX-LABEL: test_neg_fma_f32:
|
||||
; VSX: # %bb.0: # %entry
|
||||
; VSX-NEXT: xsnmsubasp 3, 1, 2
|
||||
; VSX-NEXT: xsnegdp 0, 1
|
||||
; VSX-NEXT: xsmaddasp 3, 0, 2
|
||||
; VSX-NEXT: fmr 1, 3
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
; NO-VSX-LABEL: test_neg_fma_f32:
|
||||
; NO-VSX: # %bb.0: # %entry
|
||||
; NO-VSX-NEXT: fnmsubs 1, 1, 2, 3
|
||||
; NO-VSX-NEXT: fneg 0, 1
|
||||
; NO-VSX-NEXT: fmadds 1, 0, 2, 3
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fsub contract reassoc float -0.0, %a
|
||||
@ -114,14 +122,17 @@ entry:
|
||||
define <2 x double> @test_neg_fma_v2f64(<2 x double> %a, <2 x double> %b,
|
||||
; VSX-LABEL: test_neg_fma_v2f64:
|
||||
; VSX: # %bb.0: # %entry
|
||||
; VSX-NEXT: xvnmsubadp 36, 34, 35
|
||||
; VSX-NEXT: xvnegdp 0, 34
|
||||
; VSX-NEXT: xvmaddadp 36, 0, 35
|
||||
; VSX-NEXT: vmr 2, 4
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
; NO-VSX-LABEL: test_neg_fma_v2f64:
|
||||
; NO-VSX: # %bb.0: # %entry
|
||||
; NO-VSX-NEXT: fnmsub 1, 1, 3, 5
|
||||
; NO-VSX-NEXT: fnmsub 2, 2, 4, 6
|
||||
; NO-VSX-NEXT: fneg 0, 2
|
||||
; NO-VSX-NEXT: fneg 1, 1
|
||||
; NO-VSX-NEXT: fmadd 1, 1, 3, 5
|
||||
; NO-VSX-NEXT: fmadd 2, 0, 4, 6
|
||||
; NO-VSX-NEXT: blr
|
||||
<2 x double> %c) {
|
||||
entry:
|
||||
@ -135,7 +146,8 @@ entry:
|
||||
define <4 x float> @test_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
|
||||
; VSX-LABEL: test_neg_fma_v4f32:
|
||||
; VSX: # %bb.0: # %entry
|
||||
; VSX-NEXT: xvnmsubasp 36, 34, 35
|
||||
; VSX-NEXT: xvnegsp 0, 34
|
||||
; VSX-NEXT: xvmaddasp 36, 0, 35
|
||||
; VSX-NEXT: vmr 2, 4
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
@ -167,8 +179,8 @@ define double @test_fast_mul_sub_f64(double %a, double %b, double %c) {
|
||||
; NO-VSX-NEXT: fnmsub 1, 2, 3, 1
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fmul reassoc double %b, %c
|
||||
%1 = fsub reassoc double %a, %0
|
||||
%0 = fmul reassoc nsz double %b, %c
|
||||
%1 = fsub reassoc nsz double %a, %0
|
||||
ret double %1
|
||||
}
|
||||
|
||||
@ -206,7 +218,7 @@ define double @test_fast_neg_fma_f64(double %a, double %b, double %c) {
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fsub reassoc double -0.0, %a
|
||||
%1 = call reassoc double @llvm.fma.f64(double %0, double %b, double %c)
|
||||
%1 = call reassoc nsz double @llvm.fma.f64(double %0, double %b, double %c)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
@ -222,7 +234,7 @@ define float @test_fast_mul_sub_f32(float %a, float %b, float %c) {
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fmul reassoc float %b, %c
|
||||
%1 = fsub reassoc float %a, %0
|
||||
%1 = fsub reassoc nsz float %a, %0
|
||||
ret float %1
|
||||
}
|
||||
|
||||
@ -242,7 +254,7 @@ define float @test_fast_2mul_sub_f32(float %a, float %b, float %c, float %d) {
|
||||
entry:
|
||||
%0 = fmul reassoc float %a, %b
|
||||
%1 = fmul reassoc float %c, %d
|
||||
%2 = fsub reassoc float %0, %1
|
||||
%2 = fsub reassoc nsz float %0, %1
|
||||
ret float %2
|
||||
}
|
||||
|
||||
@ -259,7 +271,7 @@ define float @test_fast_neg_fma_f32(float %a, float %b, float %c) {
|
||||
; NO-VSX-NEXT: blr
|
||||
entry:
|
||||
%0 = fsub reassoc float -0.0, %a
|
||||
%1 = call reassoc float @llvm.fma.f32(float %0, float %b, float %c)
|
||||
%1 = call reassoc nsz float @llvm.fma.f32(float %0, float %b, float %c)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
@ -278,7 +290,7 @@ define <2 x double> @test_fast_neg_fma_v2f64(<2 x double> %a, <2 x double> %b,
|
||||
<2 x double> %c) {
|
||||
entry:
|
||||
%0 = fsub reassoc <2 x double> <double -0.0, double -0.0>, %a
|
||||
%1 = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %b,
|
||||
%1 = call reassoc nsz <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %b,
|
||||
<2 x double> %c)
|
||||
ret <2 x double> %1
|
||||
}
|
||||
@ -301,7 +313,7 @@ define <4 x float> @test_fast_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
|
||||
entry:
|
||||
%0 = fsub reassoc <4 x float> <float -0.0, float -0.0, float -0.0,
|
||||
float -0.0>, %a
|
||||
%1 = call reassoc <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %b,
|
||||
%1 = call reassoc nsz <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %b,
|
||||
<4 x float> %c)
|
||||
ret <4 x float> %1
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ define double @fsub1(double %a, double %b, double %c, double %d) {
|
||||
entry:
|
||||
%mul = fmul reassoc double %b, %a
|
||||
%mul1 = fmul reassoc double %d, %c
|
||||
%sub = fsub reassoc double %mul, %mul1
|
||||
%sub = fsub reassoc nsz double %mul, %mul1
|
||||
%mul3 = fmul reassoc double %mul, %sub
|
||||
ret double %mul3
|
||||
}
|
||||
@ -113,7 +113,7 @@ define double @fma_multi_uses1(double %a, double %b, double %c, double %d, doubl
|
||||
store double %ab, double* %p1 ; extra use of %ab
|
||||
store double %ab, double* %p2 ; another extra use of %ab
|
||||
store double %cd, double* %p3 ; extra use of %cd
|
||||
%r = fsub reassoc double %ab, %cd
|
||||
%r = fsub reassoc nsz double %ab, %cd
|
||||
ret double %r
|
||||
}
|
||||
|
||||
@ -156,8 +156,8 @@ define double @fma_multi_uses3(double %a, double %b, double %c, double %d, doubl
|
||||
store double %ab, double* %p1 ; extra use of %ab
|
||||
store double %ab, double* %p2 ; another extra use of %ab
|
||||
store double %fg, double* %p3 ; extra use of %fg
|
||||
%q = fsub reassoc double %fg, %cd ; The uses of %cd reduce to 1 after %r is folded. 2 uses of %fg, fold %cd, remove def of %cd
|
||||
%r = fsub reassoc double %ab, %cd ; Fold %r before %q. 3 uses of %ab, 2 uses of %cd, fold %cd
|
||||
%q = fsub reassoc nsz double %fg, %cd ; The uses of %cd reduce to 1 after %r is folded. 2 uses of %fg, fold %cd, remove def of %cd
|
||||
%r = fsub reassoc nsz double %ab, %cd ; Fold %r before %q. 3 uses of %ab, 2 uses of %cd, fold %cd
|
||||
%add = fadd reassoc double %r, %q
|
||||
ret double %add
|
||||
}
|
||||
|
@ -95,10 +95,25 @@ define double @test_FNMSUB1(double %A, double %B, double %C) {
|
||||
%E = fsub double %C, %D ; <double> [#uses=1]
|
||||
ret double %E
|
||||
; CHECK-LABEL: test_FNMSUB1:
|
||||
; CHECK: fnmsub
|
||||
; CHECK: fneg
|
||||
; CHECK-NEXT: fmadd
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FNMSUB1:
|
||||
; CHECK-VSX: xsnegdp
|
||||
; CHECK-VSX-NEXT: xsmaddmdp
|
||||
}
|
||||
|
||||
; need nsz flag to generate fnmsub since it may affect sign of zero
|
||||
define double @test_FNMSUB1_NSZ(double %A, double %B, double %C) {
|
||||
%D = fmul nsz double %A, %B ; <double> [#uses=1]
|
||||
%E = fsub nsz double %C, %D ; <double> [#uses=1]
|
||||
ret double %E
|
||||
; CHECK-LABEL: test_FNMSUB1_NSZ:
|
||||
; CHECK: fnmsub
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FNMSUB1_NSZ:
|
||||
; CHECK-VSX: xsnmsubmdp
|
||||
}
|
||||
|
||||
|
@ -429,9 +429,9 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
|
||||
; CHECK-P9-NEXT: xsmaddasp 4, 1, 0
|
||||
; CHECK-P9-NEXT: xsmulsp 1, 3, 4
|
||||
; CHECK-P9-NEXT: blr
|
||||
%x = call reassoc arcp float @llvm.sqrt.f32(float %a)
|
||||
%y = fmul reassoc float %x, %b
|
||||
%z = fdiv reassoc arcp float %c, %y
|
||||
%x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a)
|
||||
%y = fmul reassoc nsz float %x, %b
|
||||
%z = fdiv reassoc arcp nsz float %c, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
@ -651,7 +651,7 @@ define float @goo2_fmf(float %a, float %b) nounwind {
|
||||
; CHECK-P9-NEXT: xsmaddasp 0, 3, 1
|
||||
; CHECK-P9-NEXT: fmr 1, 0
|
||||
; CHECK-P9-NEXT: blr
|
||||
%r = fdiv reassoc arcp float %a, %b
|
||||
%r = fdiv reassoc arcp nsz float %a, %b
|
||||
ret float %r
|
||||
}
|
||||
|
||||
@ -705,7 +705,7 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-P9-NEXT: xvmaddasp 0, 1, 34
|
||||
; CHECK-P9-NEXT: xxlor 34, 0, 0
|
||||
; CHECK-P9-NEXT: blr
|
||||
%r = fdiv reassoc arcp <4 x float> %a, %b
|
||||
%r = fdiv reassoc arcp nsz <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
@ -13,9 +13,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
|
||||
; CHECK-NEXT: lvx 4, 0, 3
|
||||
; CHECK-NEXT: xxspltw 0, 0, 0
|
||||
; CHECK-NEXT: xvresp 1, 0
|
||||
; CHECK-NEXT: xvmaddasp 35, 0, 1
|
||||
; CHECK-NEXT: xvnmsubasp 35, 0, 1
|
||||
; CHECK-NEXT: xvmulsp 0, 34, 36
|
||||
; CHECK-NEXT: xvnmsubasp 1, 1, 35
|
||||
; CHECK-NEXT: xvmaddasp 1, 1, 35
|
||||
; CHECK-NEXT: xvmulsp 34, 0, 1
|
||||
; CHECK-NEXT: blr
|
||||
%ins = insertelement <4 x float> undef, float %a, i32 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user