mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[Hexagon] Improvements to handling and generation of FP instructions
Improved handling of fma, floating point min/max, additional load/store instructions for floating point types. Patch by Jyotsna Verma. llvm-svn: 279239
This commit is contained in:
parent
12fdf6ca4a
commit
3cad3632a2
@ -2040,6 +2040,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::FSUB, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMUL, MVT::f64, Expand);
|
||||
|
||||
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
|
||||
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
|
||||
@ -2287,6 +2290,10 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
|
||||
return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
|
||||
}
|
||||
|
||||
bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
return isOperationLegalOrCustom(ISD::FMA, VT);
|
||||
}
|
||||
|
||||
// Should we expand the build vector with shuffles?
|
||||
bool
|
||||
HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
|
||||
|
@ -115,6 +115,12 @@ bool isPositiveHalfWord(SDNode *N);
|
||||
|
||||
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
|
||||
|
||||
/// Return true if an FMA operation is faster than a pair of mul and add
|
||||
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
|
||||
/// method returns true (and FMAs are legal), otherwise fmuladd is
|
||||
/// expanded to mul + add.
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT) const override;
|
||||
|
||||
// Should we expand the build vector with shuffles?
|
||||
bool shouldExpandBuildVectorWithShuffles(EVT VT,
|
||||
unsigned DefinedValues) const override;
|
||||
|
@ -97,8 +97,70 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
|
||||
let Inst{20-16} = Rss;
|
||||
}
|
||||
|
||||
defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
|
||||
defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
|
||||
let AddedComplexity = 20 in {
|
||||
defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
|
||||
defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 60 in {
|
||||
defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
|
||||
defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 40 in {
|
||||
def: Loadxs_pat<load, f32, L4_loadri_rr>;
|
||||
def: Loadxs_pat<load, f64, L4_loadrd_rr>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
|
||||
def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 80 in {
|
||||
def: Loada_pat<load, f32, u32ImmPred, L4_loadri_abs>;
|
||||
def: Loada_pat<load, f32, addrga, L4_loadri_abs>;
|
||||
def: Loada_pat<load, f64, addrga, L4_loadrd_abs>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 100 in {
|
||||
def: LoadGP_pats <load, L2_loadrigp, f32>;
|
||||
def: LoadGP_pats <load, L2_loadrdgp, f64>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
|
||||
defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
|
||||
}
|
||||
|
||||
// Simple patterns should be tried with the least priority.
|
||||
def: Storex_simple_pat<store, F32, S2_storeri_io>;
|
||||
def: Storex_simple_pat<store, F64, S2_storerd_io>;
|
||||
|
||||
let AddedComplexity = 60 in {
|
||||
defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
|
||||
defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 40 in {
|
||||
def: Storexs_pat<store, F32, S4_storeri_rr>;
|
||||
def: Storexs_pat<store, F64, S4_storerd_rr>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def: Store_rr_pat<store, F32, S4_storeri_rr>;
|
||||
def: Store_rr_pat<store, F64, S4_storerd_rr>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 80 in {
|
||||
def: Storea_pat<store, F32, addrga, S2_storeriabs>;
|
||||
def: Storea_pat<store, F64, addrga, S2_storerdabs>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 100 in {
|
||||
def: Storea_pat<store, F32, addrgp, S2_storerigp>;
|
||||
def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
|
||||
}
|
||||
|
||||
defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
|
||||
defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
|
||||
@ -148,6 +210,11 @@ let Itinerary = M_tc_3x_SLOT23 in {
|
||||
def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
|
||||
}
|
||||
|
||||
let Predicates = [HasV5T] in {
|
||||
def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
|
||||
def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 100, Predicates = [HasV5T] in {
|
||||
class SfSel12<PatFrag Cmp, InstHexagon MI>
|
||||
: Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
|
||||
@ -166,12 +233,14 @@ let AddedComplexity = 100, Predicates = [HasV5T] in {
|
||||
def: SfSel21<setoge, F2_sfmin>;
|
||||
}
|
||||
|
||||
let Itinerary = M_tc_3or4x_SLOT23 in {
|
||||
def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
|
||||
def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
|
||||
}
|
||||
|
||||
// F2_sfrecipa: Reciprocal approximation for division.
|
||||
let isPredicateLate = 1, isFP = 1,
|
||||
hasSideEffects = 0, hasNewValue = 1 in
|
||||
let Uses = [USR], isPredicateLate = 1, isFP = 1,
|
||||
hasSideEffects = 0, hasNewValue = 1, Itinerary = M_tc_3or4x_SLOT23 in
|
||||
def F2_sfrecipa: MInst <
|
||||
(outs IntRegs:$Rd, PredRegs:$Pe),
|
||||
(ins IntRegs:$Rs, IntRegs:$Rt),
|
||||
@ -193,7 +262,7 @@ def F2_sfrecipa: MInst <
|
||||
}
|
||||
|
||||
// F2_dfcmpeq: Floating point compare for equal.
|
||||
let isCompare = 1, isFP = 1 in
|
||||
let Uses = [USR], isCompare = 1, isFP = 1 in
|
||||
class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
|
||||
list<dag> pattern = [] >
|
||||
: ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
|
||||
@ -484,7 +553,7 @@ let Predicates = [HasV5T] in {
|
||||
}
|
||||
|
||||
// F2 convert template classes:
|
||||
let isFP = 1 in
|
||||
let Uses = [USR], isFP = 1 in
|
||||
class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
|
||||
SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
|
||||
string chop ="">
|
||||
@ -503,7 +572,7 @@ class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
|
||||
let Inst{4-0} = Rdd;
|
||||
}
|
||||
|
||||
let isFP = 1 in
|
||||
let Uses = [USR], isFP = 1 in
|
||||
class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
|
||||
SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
|
||||
string chop ="">
|
||||
@ -522,7 +591,7 @@ class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
|
||||
let Inst{4-0} = Rdd;
|
||||
}
|
||||
|
||||
let isFP = 1, hasNewValue = 1 in
|
||||
let Uses = [USR], isFP = 1, hasNewValue = 1 in
|
||||
class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
|
||||
SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
|
||||
string chop ="">
|
||||
@ -542,7 +611,7 @@ class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
|
||||
let Inst{4-0} = Rd;
|
||||
}
|
||||
|
||||
let isFP = 1, hasNewValue = 1 in
|
||||
let Uses = [USR], isFP = 1, hasNewValue = 1 in
|
||||
class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
|
||||
SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
|
||||
string chop ="">
|
||||
@ -626,7 +695,7 @@ let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
|
||||
}
|
||||
|
||||
// Fix up radicand.
|
||||
let isFP = 1, hasNewValue = 1 in
|
||||
let Uses = [USR], isFP = 1, hasNewValue = 1 in
|
||||
def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
|
||||
"$Rd = sffixupr($Rs)",
|
||||
[], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
|
||||
@ -650,12 +719,12 @@ let Predicates = [HasV5T] in {
|
||||
}
|
||||
|
||||
// F2_sffma: Floating-point fused multiply add.
|
||||
let isFP = 1, hasNewValue = 1 in
|
||||
let Uses = [USR], isFP = 1, hasNewValue = 1 in
|
||||
class T_sfmpy_acc <bit isSub, bit isLib>
|
||||
: MInst<(outs IntRegs:$Rx),
|
||||
(ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
|
||||
"$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
|
||||
[], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
|
||||
[], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
|
||||
Requires<[HasV5T]> {
|
||||
bits<5> Rx;
|
||||
bits<5> Rs;
|
||||
@ -681,13 +750,19 @@ def F2_sffms_lib: T_sfmpy_acc <1, 1>;
|
||||
def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
|
||||
(F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
|
||||
|
||||
def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
|
||||
(F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
|
||||
|
||||
def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
|
||||
(F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
|
||||
|
||||
// Floating-point fused multiply add w/ additional scaling (2**pu).
|
||||
let isFP = 1, hasNewValue = 1 in
|
||||
let Uses = [USR], isFP = 1, hasNewValue = 1 in
|
||||
def F2_sffma_sc: MInst <
|
||||
(outs IntRegs:$Rx),
|
||||
(ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
|
||||
"$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
|
||||
[], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
|
||||
[], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
|
||||
Requires<[HasV5T]> {
|
||||
bits<5> Rx;
|
||||
bits<5> Rs;
|
||||
@ -834,10 +909,10 @@ let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
|
||||
}
|
||||
|
||||
// Classify floating-point value
|
||||
let isFP = 1 in
|
||||
let Uses = [USR], isFP = 1 in
|
||||
def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
|
||||
|
||||
let isFP = 1 in
|
||||
let Uses = [USR], isFP = 1 in
|
||||
def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
|
||||
"$Pd = dfclass($Rss, #$u5)",
|
||||
[], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
|
||||
@ -858,7 +933,7 @@ def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
|
||||
class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
|
||||
: ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
|
||||
"$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
|
||||
[], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> {
|
||||
[], "", ALU64_tc_2_SLOT23>, Requires<[HasV5T]> {
|
||||
bits<5> dst;
|
||||
bits<10> src;
|
||||
|
||||
|
@ -151,6 +151,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
|
||||
Reserved.set(Hexagon::CS0);
|
||||
Reserved.set(Hexagon::CS1);
|
||||
Reserved.set(Hexagon::CS);
|
||||
Reserved.set(Hexagon::USR);
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
|
89
test/CodeGen/Hexagon/float-amode.ll
Normal file
89
test/CodeGen/Hexagon/float-amode.ll
Normal file
@ -0,0 +1,89 @@
|
||||
; RUN: llc -march=hexagon -fp-contract=fast -disable-hexagon-peephole -disable-hexagon-amodeopt < %s | FileCheck %s
|
||||
|
||||
; The test checks for various addressing modes for floating point loads/stores.
|
||||
|
||||
%struct.matrix_paramsGlob = type { [50 x i8], i16, [50 x float] }
|
||||
%struct.matrix_params = type { [50 x i8], i16, float** }
|
||||
%struct.matrix_params2 = type { i16, [50 x [50 x float]] }
|
||||
|
||||
@globB = common global %struct.matrix_paramsGlob zeroinitializer, align 4
|
||||
@globA = common global %struct.matrix_paramsGlob zeroinitializer, align 4
|
||||
@b = common global float 0.000000e+00, align 4
|
||||
@a = common global float 0.000000e+00, align 4
|
||||
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK: [[REG11:(r[0-9]+)]]{{ *}}={{ *}}memw(r{{[0-9]+}} + r{{[0-9]+}}<<#2)
|
||||
; CHECK: [[REG12:(r[0-9]+)]] += sfmpy({{.*}}[[REG11]]
|
||||
; CHECK: memw(r{{[0-9]+}} + r{{[0-9]+}}<<#2) = [[REG12]].new
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @test1(%struct.matrix_params* nocapture readonly %params, i32 %col1) {
|
||||
entry:
|
||||
%matrixA = getelementptr inbounds %struct.matrix_params, %struct.matrix_params* %params, i32 0, i32 2
|
||||
%0 = load float**, float*** %matrixA, align 4
|
||||
%arrayidx = getelementptr inbounds float*, float** %0, i32 2
|
||||
%1 = load float*, float** %arrayidx, align 4
|
||||
%arrayidx1 = getelementptr inbounds float, float* %1, i32 %col1
|
||||
%2 = load float, float* %arrayidx1, align 4
|
||||
%mul = fmul float %2, 2.000000e+01
|
||||
%add = fadd float %mul, 1.000000e+01
|
||||
%arrayidx3 = getelementptr inbounds float*, float** %0, i32 5
|
||||
%3 = load float*, float** %arrayidx3, align 4
|
||||
%arrayidx4 = getelementptr inbounds float, float* %3, i32 %col1
|
||||
store float %add, float* %arrayidx4, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK: [[REG21:(r[0-9]+)]]{{ *}}={{ *}}memw(##globB+92)
|
||||
; CHECK: [[REG22:(r[0-9]+)]] = sfadd({{.*}}[[REG21]]
|
||||
; CHECK: memw(##globA+84) = [[REG22]]
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @test2(%struct.matrix_params* nocapture readonly %params, i32 %col1) {
|
||||
entry:
|
||||
%matrixA = getelementptr inbounds %struct.matrix_params, %struct.matrix_params* %params, i32 0, i32 2
|
||||
%0 = load float**, float*** %matrixA, align 4
|
||||
%1 = load float*, float** %0, align 4
|
||||
%arrayidx1 = getelementptr inbounds float, float* %1, i32 %col1
|
||||
%2 = load float, float* %arrayidx1, align 4
|
||||
%3 = load float, float* getelementptr inbounds (%struct.matrix_paramsGlob, %struct.matrix_paramsGlob* @globB, i32 0, i32 2, i32 10), align 4
|
||||
%add = fadd float %2, %3
|
||||
store float %add, float* getelementptr inbounds (%struct.matrix_paramsGlob, %struct.matrix_paramsGlob* @globA, i32 0, i32 2, i32 8), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test3
|
||||
; CHECK: [[REG31:(r[0-9]+)]]{{ *}}={{ *}}memw(#b)
|
||||
; CHECK: [[REG32:(r[0-9]+)]] = sfadd({{.*}}[[REG31]]
|
||||
; CHECK: memw(#a) = [[REG32]]
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @test3(%struct.matrix_params* nocapture readonly %params, i32 %col1) {
|
||||
entry:
|
||||
%matrixA = getelementptr inbounds %struct.matrix_params, %struct.matrix_params* %params, i32 0, i32 2
|
||||
%0 = load float**, float*** %matrixA, align 4
|
||||
%1 = load float*, float** %0, align 4
|
||||
%arrayidx1 = getelementptr inbounds float, float* %1, i32 %col1
|
||||
%2 = load float, float* %arrayidx1, align 4
|
||||
%3 = load float, float* @b, align 4
|
||||
%add = fadd float %2, %3
|
||||
store float %add, float* @a, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test4
|
||||
; CHECK: [[REG41:(r[0-9]+)]]{{ *}}={{ *}}memw(r0<<#2 + ##globB+52)
|
||||
; CHECK: [[REG42:(r[0-9]+)]] = sfadd({{.*}}[[REG41]]
|
||||
; CHECK: memw(r0<<#2 + ##globA+60) = [[REG42]]
|
||||
; Function Attrs: noinline norecurse nounwind
|
||||
define void @test4(i32 %col1) {
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds %struct.matrix_paramsGlob, %struct.matrix_paramsGlob* @globB, i32 0, i32 2, i32 %col1
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%add = fadd float %0, 0.000000e+00
|
||||
%add1 = add nsw i32 %col1, 2
|
||||
%arrayidx2 = getelementptr inbounds %struct.matrix_paramsGlob, %struct.matrix_paramsGlob* @globA, i32 0, i32 2, i32 %add1
|
||||
store float %add, float* %arrayidx2, align 4
|
||||
ret void
|
||||
}
|
27
test/CodeGen/Hexagon/fminmax.ll
Normal file
27
test/CodeGen/Hexagon/fminmax.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
target triple = "hexagon"
|
||||
|
||||
; CHECK-LABEL: minimum
|
||||
; CHECK: sfmin
|
||||
define float @minimum(float %x, float %y) #0 {
|
||||
entry:
|
||||
%call = tail call float @fminf(float %x, float %y) #1
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: maximum
|
||||
; CHECK: sfmax
|
||||
define float @maximum(float %x, float %y) #0 {
|
||||
entry:
|
||||
%call = tail call float @fmaxf(float %x, float %y) #1
|
||||
ret float %call
|
||||
}
|
||||
|
||||
declare float @fminf(float, float) #0
|
||||
declare float @fmaxf(float, float) #0
|
||||
|
||||
attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
25
test/CodeGen/Hexagon/sffms.ll
Normal file
25
test/CodeGen/Hexagon/sffms.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: llc -march=hexagon -fp-contract=fast < %s | FileCheck %s
|
||||
|
||||
; Check that "Rx-=sfmpy(Rs,Rt)" is being generated for "fsub(fmul(..))"
|
||||
|
||||
; CHECK: r{{[0-9]+}} -= sfmpy
|
||||
|
||||
%struct.matrix_params = type { float** }
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @loop2_1(%struct.matrix_params* nocapture readonly %params, i32 %col1) #0 {
|
||||
entry:
|
||||
%matrixA = getelementptr inbounds %struct.matrix_params, %struct.matrix_params* %params, i32 0, i32 0
|
||||
%0 = load float**, float*** %matrixA, align 4
|
||||
%1 = load float*, float** %0, align 4
|
||||
%arrayidx1 = getelementptr inbounds float, float* %1, i32 %col1
|
||||
%2 = load float, float* %arrayidx1, align 4
|
||||
%arrayidx3 = getelementptr inbounds float*, float** %0, i32 %col1
|
||||
%3 = load float*, float** %arrayidx3, align 4
|
||||
%4 = load float, float* %3, align 4
|
||||
%mul = fmul float %2, %4
|
||||
%sub = fsub float %2, %mul
|
||||
%arrayidx10 = getelementptr inbounds float, float* %3, i32 %col1
|
||||
store float %sub, float* %arrayidx10, align 4
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user