mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[ARM] Classification Improvements to ARM Sched-Models. NFCI.
This is a series of patches to enable adding of machine sched models for ARM processors easier and compact. They define new sched-readwrites for groups of ARM instructions. This has been missing so far, and as a consequence, machine scheduler models for individual sub-targets have tended to be larger than they needed to be. The current patch focuses on floating-point instructions. Reviewers: Diana Picus (rovka), Renato Golin (rengolin) Differential Revision: https://reviews.llvm.org/D28194 llvm-svn: 292825
This commit is contained in:
parent
9711c6c21b
commit
97ef1a63f1
@ -336,13 +336,15 @@ let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
||||
def VADDD : ADbI<0b11100, 0b11, 0, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
||||
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
|
||||
[(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
|
||||
[(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPALU64]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
|
||||
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
|
||||
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
|
||||
Sched<[WriteFPALU32]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -352,19 +354,22 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VADDH : AHbI<0b11100, 0b11, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>;
|
||||
[]>,
|
||||
Sched<[WriteFPALU32]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
||||
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
||||
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
|
||||
[(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
|
||||
[(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPALU64]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
|
||||
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
|
||||
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
|
||||
Sched<[WriteFPALU32]>{
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -374,37 +379,43 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>;
|
||||
[]>,
|
||||
Sched<[WriteFPALU32]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
||||
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
||||
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
|
||||
[(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
|
||||
[(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPDIV64]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
|
||||
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
|
||||
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
|
||||
Sched<[WriteFPDIV32]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>;
|
||||
[]>,
|
||||
Sched<[WriteFPDIV32]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
||||
def VMULD : ADbI<0b11100, 0b10, 0, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
||||
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
|
||||
[(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
|
||||
[(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
|
||||
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
|
||||
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
|
||||
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -414,17 +425,20 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
||||
def VMULH : AHbI<0b11100, 0b10, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>;
|
||||
[]>,
|
||||
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
||||
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
|
||||
[(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
|
||||
[(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
|
||||
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
|
||||
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
|
||||
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
|
||||
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -433,7 +447,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
|
||||
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>;
|
||||
[]>,
|
||||
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
multiclass vsel_inst<string op, bits<2> opc, int CC> {
|
||||
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
|
||||
@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
||||
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
|
||||
(outs DPR:$Dd), (ins SPR:$Sm),
|
||||
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
|
||||
[(set DPR:$Dd, (fpextend SPR:$Sm))]> {
|
||||
[(set DPR:$Dd, (fpextend SPR:$Sm))]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Instruction operands.
|
||||
bits<5> Dd;
|
||||
bits<5> Sm;
|
||||
@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
|
||||
// Special case encoding: bits 11-8 is 0b1011.
|
||||
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
|
||||
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
|
||||
[(set SPR:$Sd, (fpround DPR:$Dm))]> {
|
||||
[(set SPR:$Sd, (fpround DPR:$Dm))]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Instruction operands.
|
||||
bits<5> Sd;
|
||||
bits<5> Dm;
|
||||
@ -667,27 +684,32 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
|
||||
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
||||
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
|
||||
[/* For disassembly only; pattern left blank */]>,
|
||||
Requires<[HasFP16]>;
|
||||
Requires<[HasFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
||||
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
|
||||
[/* For disassembly only; pattern left blank */]>,
|
||||
Requires<[HasFP16]>;
|
||||
Requires<[HasFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
||||
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
|
||||
[/* For disassembly only; pattern left blank */]>,
|
||||
Requires<[HasFP16]>;
|
||||
Requires<[HasFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
||||
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
|
||||
[/* For disassembly only; pattern left blank */]>,
|
||||
Requires<[HasFP16]>;
|
||||
Requires<[HasFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
|
||||
(outs DPR:$Dd), (ins SPR:$Sm),
|
||||
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
|
||||
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
|
||||
[]>, Requires<[HasFPARMv8, HasDPVFP]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Instruction operands.
|
||||
bits<5> Sm;
|
||||
|
||||
@ -946,12 +968,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
|
||||
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Dm),
|
||||
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
|
||||
[(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
|
||||
[(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPSQRT64]>;
|
||||
|
||||
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
|
||||
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
|
||||
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
|
||||
Sched<[WriteFPSQRT32]>;
|
||||
|
||||
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
@ -987,7 +1011,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
|
||||
def VMOVRS : AVConv2I<0b11100001, 0b1010,
|
||||
(outs GPR:$Rt), (ins SPR:$Sn),
|
||||
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
|
||||
[(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
|
||||
[(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<4> Rt;
|
||||
bits<5> Sn;
|
||||
@ -1010,7 +1035,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
|
||||
(outs SPR:$Sn), (ins GPR:$Rt),
|
||||
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
|
||||
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
|
||||
Requires<[HasVFP2, UseVMOVSR]> {
|
||||
Requires<[HasVFP2, UseVMOVSR]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<5> Sn;
|
||||
bits<4> Rt;
|
||||
@ -1032,7 +1058,8 @@ let hasSideEffects = 0 in {
|
||||
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
|
||||
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
|
||||
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
|
||||
[/* FIXME: Can't write pattern for multiple result instr*/]> {
|
||||
[/* FIXME: Can't write pattern for multiple result instr*/]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<5> Dm;
|
||||
bits<4> Rt;
|
||||
@ -1059,7 +1086,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
|
||||
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
|
||||
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
|
||||
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
[/* For disassembly only; pattern left blank */]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
bits<5> src1;
|
||||
bits<4> Rt;
|
||||
bits<4> Rt2;
|
||||
@ -1085,7 +1113,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
|
||||
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
|
||||
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
|
||||
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
|
||||
[(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
|
||||
[(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<5> Dm;
|
||||
bits<4> Rt;
|
||||
@ -1128,7 +1157,8 @@ let hasSideEffects = 0 in
|
||||
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
|
||||
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
|
||||
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
|
||||
[/* For disassembly only; pattern left blank */]> {
|
||||
[/* For disassembly only; pattern left blank */]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<5> dst1;
|
||||
bits<4> src1;
|
||||
@ -1154,7 +1184,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
|
||||
(outs GPR:$Rt), (ins SPR:$Sn),
|
||||
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
|
||||
[]>,
|
||||
Requires<[HasFullFP16]> {
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<4> Rt;
|
||||
bits<5> Sn;
|
||||
@ -1173,7 +1204,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
|
||||
(outs SPR:$Sn), (ins GPR:$Rt),
|
||||
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
|
||||
[]>,
|
||||
Requires<[HasFullFP16]> {
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPMOV]> {
|
||||
// Instruction operands.
|
||||
bits<5> Sn;
|
||||
bits<4> Rt;
|
||||
@ -1254,7 +1286,8 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
||||
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
||||
(outs DPR:$Dd), (ins SPR:$Sm),
|
||||
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // s32
|
||||
}
|
||||
|
||||
@ -1269,7 +1302,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
|
||||
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
||||
(outs SPR:$Sd),(ins SPR:$Sm),
|
||||
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // s32
|
||||
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
@ -1286,14 +1320,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
||||
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // s32
|
||||
}
|
||||
|
||||
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
||||
(outs DPR:$Dd), (ins SPR:$Sm),
|
||||
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // u32
|
||||
}
|
||||
|
||||
@ -1308,7 +1344,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
|
||||
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // u32
|
||||
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
@ -1325,7 +1362,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
||||
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // u32
|
||||
}
|
||||
|
||||
@ -1390,7 +1428,8 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
||||
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
|
||||
(outs SPR:$Sd), (ins DPR:$Dm),
|
||||
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // Z bit
|
||||
}
|
||||
|
||||
@ -1405,7 +1444,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
|
||||
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // Z bit
|
||||
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
@ -1423,14 +1463,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
|
||||
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // Z bit
|
||||
}
|
||||
|
||||
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
||||
(outs SPR:$Sd), (ins DPR:$Dm),
|
||||
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // Z bit
|
||||
}
|
||||
|
||||
@ -1445,7 +1487,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
|
||||
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // Z bit
|
||||
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
@ -1463,7 +1506,8 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
|
||||
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 1; // Z bit
|
||||
}
|
||||
|
||||
@ -1473,42 +1517,48 @@ let Uses = [FPSCR] in {
|
||||
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
|
||||
(outs SPR:$Sd), (ins DPR:$Dm),
|
||||
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
|
||||
[(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
|
||||
[(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
|
||||
[(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
|
||||
[(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
||||
(outs SPR:$Sd), (ins DPR:$Dm),
|
||||
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
|
||||
[(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
|
||||
[(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
|
||||
[(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
|
||||
[(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
|
||||
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
|
||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
|
||||
[]> {
|
||||
[]>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
let Inst{7} = 0; // Z bit
|
||||
}
|
||||
}
|
||||
@ -1528,8 +1578,7 @@ let Constraints = "$a = $dst" in {
|
||||
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
||||
bit op5, dag oops, dag iops, InstrItinClass itin,
|
||||
string opc, string asm, list<dag> pattern>
|
||||
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
|
||||
Sched<[WriteCvtFP]> {
|
||||
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
|
||||
bits<5> dst;
|
||||
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
|
||||
let Inst{22} = dst{0};
|
||||
@ -1540,8 +1589,7 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
||||
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
||||
bit op5, dag oops, dag iops, InstrItinClass itin,
|
||||
string opc, string asm, list<dag> pattern>
|
||||
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
|
||||
Sched<[WriteCvtFP]> {
|
||||
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
|
||||
bits<5> dst;
|
||||
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
|
||||
let Inst{22} = dst{4};
|
||||
@ -1553,26 +1601,31 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
||||
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
|
||||
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1604,45 +1657,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
|
||||
|
||||
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
// Fixed-Point to FP:
|
||||
|
||||
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
|
||||
Requires<[HasFullFP16]>;
|
||||
Requires<[HasFullFP16]>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
|
||||
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1650,7 +1712,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
|
||||
|
||||
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
|
||||
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1658,7 +1721,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
|
||||
|
||||
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
|
||||
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1666,7 +1730,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
|
||||
|
||||
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
|
||||
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
|
||||
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1674,19 +1739,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
|
||||
|
||||
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
||||
IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
|
||||
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
||||
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
|
||||
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
|
||||
Sched<[WriteFPCVT]>;
|
||||
|
||||
} // End of 'let Constraints = "$a = $dst" in'
|
||||
|
||||
@ -1700,7 +1769,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
|
||||
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
||||
(f64 DPR:$Ddin)))]>,
|
||||
RegConstraint<"$Ddin = $Dd">,
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||
@ -1708,7 +1778,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
|
||||
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
||||
SPR:$Sdin))]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1734,7 +1805,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
|
||||
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
||||
(f64 DPR:$Ddin)))]>,
|
||||
RegConstraint<"$Ddin = $Dd">,
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||
@ -1742,7 +1814,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
|
||||
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
||||
SPR:$Sdin))]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1768,7 +1841,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
|
||||
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
||||
(f64 DPR:$Ddin)))]>,
|
||||
RegConstraint<"$Ddin = $Dd">,
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||
@ -1776,7 +1850,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
|
||||
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
||||
SPR:$Sdin))]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1802,14 +1877,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
|
||||
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
||||
(f64 DPR:$Ddin)))]>,
|
||||
RegConstraint<"$Ddin = $Dd">,
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
||||
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
|
||||
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
|
||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines on A8.
|
||||
let D = VFPNeonA8Domain;
|
||||
@ -1838,7 +1915,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
|
||||
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
||||
(f64 DPR:$Ddin)))]>,
|
||||
RegConstraint<"$Ddin = $Dd">,
|
||||
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
||||
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
||||
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
||||
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
||||
@ -1846,7 +1924,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
||||
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
||||
SPR:$Sdin))]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
|
||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
||||
// Some single precision VFP instructions may be executed on both NEON and
|
||||
// VFP pipelines.
|
||||
}
|
||||
@ -1856,7 +1935,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0,
|
||||
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
|
||||
[]>,
|
||||
RegConstraint<"$Sdin = $Sd">,
|
||||
Requires<[HasFullFP16,UseFusedMAC]>;
|
||||
Requires<[HasFullFP16,UseFusedMAC]>,
|
||||
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
||||
|
||||
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
||||
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
||||
|
@ -7,7 +7,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction scheduling annotations for out-of-order CPUs.
|
||||
// Instruction scheduling annotations for in-order and out-of-order CPUs.
|
||||
// These annotations are independent of the itinerary class defined below.
|
||||
// Here we define the subtarget independent read/write per-operand resources.
|
||||
// The subtarget schedule definitions will then map these to the subtarget's
|
||||
@ -54,6 +54,9 @@
|
||||
// }
|
||||
// def : ReadAdvance<ReadAdvanceALUsr, 3>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Sched definitions for integer pipeline instructions
|
||||
//
|
||||
// Basic ALU operation.
|
||||
def WriteALU : SchedWrite;
|
||||
def ReadALU : SchedRead;
|
||||
@ -81,12 +84,38 @@ def WriteBr : SchedWrite;
|
||||
def WriteBrL : SchedWrite;
|
||||
def WriteBrTbl : SchedWrite;
|
||||
|
||||
// Fixpoint conversions.
|
||||
def WriteCvtFP : SchedWrite;
|
||||
|
||||
// Noop.
|
||||
def WriteNoop : SchedWrite;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Sched definitions for floating-point and neon instructions
|
||||
//
|
||||
// Floating point conversions
|
||||
def WriteFPCVT : SchedWrite;
|
||||
def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
|
||||
|
||||
// ALU operations (32/64-bit)
|
||||
def WriteFPALU32 : SchedWrite;
|
||||
def WriteFPALU64 : SchedWrite;
|
||||
|
||||
// Multiplication
|
||||
def WriteFPMUL32 : SchedWrite;
|
||||
def WriteFPMUL64 : SchedWrite;
|
||||
def ReadFPMUL : SchedRead; // multiplier read
|
||||
def ReadFPMAC : SchedRead; // accumulator read
|
||||
|
||||
// Multiply-accumulate
|
||||
def WriteFPMAC32 : SchedWrite;
|
||||
def WriteFPMAC64 : SchedWrite;
|
||||
|
||||
// Division
|
||||
def WriteFPDIV32 : SchedWrite;
|
||||
def WriteFPDIV64 : SchedWrite;
|
||||
|
||||
// Square-root
|
||||
def WriteFPSQRT32 : SchedWrite;
|
||||
def WriteFPSQRT64 : SchedWrite;
|
||||
|
||||
// Define TII for use in SchedVariant Predicates.
|
||||
def : PredicateProlog<[{
|
||||
const ARMBaseInstrInfo *TII =
|
||||
|
@ -2471,6 +2471,33 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>;
|
||||
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
|
||||
def : SchedAlias<ReadALU, A9ReadALU>;
|
||||
def : SchedAlias<ReadALUsr, A9ReadALU>;
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// Floating-point. Map target defined SchedReadWrite to processor specific ones
|
||||
//
|
||||
def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
|
||||
def : SchedAlias<WriteFPMOV, A9WriteFMov>;
|
||||
|
||||
def : SchedAlias<WriteFPALU32, A9WriteF>;
|
||||
def : SchedAlias<WriteFPALU64, A9WriteF>;
|
||||
|
||||
def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
|
||||
def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
|
||||
|
||||
def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
|
||||
def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
|
||||
|
||||
def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
|
||||
def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
|
||||
def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
|
||||
def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
|
||||
|
||||
def : ReadAdvance<ReadFPMUL, 0>;
|
||||
def : ReadAdvance<ReadFPMAC, 0>;
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
|
||||
//
|
||||
def : InstRW< [WriteALU],
|
||||
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
|
||||
"BICrr")>;
|
||||
@ -2524,6 +2551,5 @@ def : WriteRes<WriteBr, [A9UnitB]>;
|
||||
def : WriteRes<WriteBrL, [A9UnitB]>;
|
||||
def : WriteRes<WriteBrTbl, [A9UnitB]>;
|
||||
def : WriteRes<WritePreLd, []>;
|
||||
def : SchedAlias<WriteCvtFP, A9WriteF>;
|
||||
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
|
||||
} // SchedModel = CortexA9Model
|
||||
|
@ -86,12 +86,45 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
|
||||
|
||||
// Misc
|
||||
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
|
||||
def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
|
||||
|
||||
// Integer pipeline by-passes
|
||||
def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
|
||||
def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
|
||||
|
||||
|
||||
// Floating-point. Map target-defined SchedReadWrites to subtarget
|
||||
def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
|
||||
|
||||
def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
|
||||
let Latency = 6;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
|
||||
let Latency = 11; // as it is internally two insns (MUL then ADD)
|
||||
}
|
||||
|
||||
def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
|
||||
R52UnitFPALU, R52UnitFPALU]> {
|
||||
let Latency = 11;
|
||||
}
|
||||
|
||||
def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
|
||||
let Latency = 7; // FP div takes fixed #cycles
|
||||
let ResourceCycles = [7]; // is not pipelined
|
||||
}
|
||||
|
||||
def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
|
||||
def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
|
||||
|
||||
def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
|
||||
def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget-specific SchedReadWrites.
|
||||
|
||||
@ -147,19 +180,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
|
||||
def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
|
||||
def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
|
||||
|
||||
def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
|
||||
let Latency = 7; // FP div takes fixed #cycles
|
||||
let ResourceCycles = [7]; // is not pipelined
|
||||
}
|
||||
def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17];
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating-point. Map target defined SchedReadWrites to processor specific ones
|
||||
//
|
||||
def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
|
||||
def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
|
||||
def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
|
||||
def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget-specific - map operands to SchedReadWrites
|
||||
|
||||
// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
|
||||
//
|
||||
def : InstRW<[WriteALU], (instrs COPY)>;
|
||||
|
||||
def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
|
||||
@ -492,12 +523,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VAC
|
||||
def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
|
||||
def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
|
||||
|
||||
def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
|
||||
def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
|
||||
|
||||
def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
|
||||
(instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
|
||||
|
||||
def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
|
||||
def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
|
||||
|
||||
@ -777,9 +802,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHS
|
||||
|
||||
def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
|
||||
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
|
||||
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
|
||||
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
|
||||
def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
|
||||
def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
|
||||
def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
|
||||
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
|
||||
def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
|
||||
|
@ -597,8 +597,6 @@ let SchedModel = SwiftModel in {
|
||||
def : InstRW<[SwiftWriteP1FourCycle],
|
||||
(instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
|
||||
"VMULL", "VQDMULL")>;
|
||||
def : InstRW<[SwiftWriteP1SixCycle],
|
||||
(instregex "VMULD", "VNMULD")>;
|
||||
def : InstRW<[SwiftWriteP1FourCycle],
|
||||
(instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
|
||||
"VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
|
||||
@ -607,8 +605,6 @@ let SchedModel = SwiftModel in {
|
||||
|
||||
// 4.2.36 Advanced SIMD and VFP, Convert
|
||||
def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
|
||||
// Fixpoint conversions.
|
||||
def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
|
||||
|
||||
// 4.2.37 Advanced SIMD and VFP, Move
|
||||
def : InstRW<[SwiftWriteP0TwoCycle],
|
||||
@ -1036,6 +1032,30 @@ let SchedModel = SwiftModel in {
|
||||
def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
|
||||
def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// Floating-point. Map target defined SchedReadWrite to processor specific ones
|
||||
//
|
||||
def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>;
|
||||
def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>;
|
||||
|
||||
def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>;
|
||||
def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>;
|
||||
|
||||
def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>;
|
||||
def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>;
|
||||
|
||||
def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>;
|
||||
def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>;
|
||||
|
||||
def : SchedAlias<WriteFPDIV32, SwiftDiv17>;
|
||||
def : SchedAlias<WriteFPSQRT32, SwiftDiv17>;
|
||||
|
||||
def : SchedAlias<WriteFPDIV64, SwiftDiv32>;
|
||||
def : SchedAlias<WriteFPSQRT64, SwiftDiv32>;
|
||||
|
||||
def : ReadAdvance<ReadFPMUL, 0>;
|
||||
def : ReadAdvance<ReadFPMAC, 0>;
|
||||
|
||||
// Not specified.
|
||||
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
|
||||
// Preload.
|
||||
|
69
test/CodeGen/ARM/misched-fp-basic.ll
Normal file
69
test/CodeGen/ARM/misched-fp-basic.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
|
||||
; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9
|
||||
; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
|
||||
; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT
|
||||
; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
|
||||
; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52
|
||||
;
|
||||
; Check the latency of instructions for processors with sched-models
|
||||
;
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define i32 @foo(float %a, float %b, float %c, i32 %d) local_unnamed_addr #0 {
|
||||
entry:
|
||||
;
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK_A9: VADDS
|
||||
; CHECK_SWIFT: VADDfd
|
||||
; CHECK_R52: VADDS
|
||||
; CHECK_A9: Latency : 5
|
||||
; CHECK_SWIFT: Latency : 4
|
||||
; CHECK_R52: Latency : 6
|
||||
;
|
||||
; CHECK_A9: VMULS
|
||||
; CHECK_SWIFT: VMULfd
|
||||
; CHECK_R52: VMULS
|
||||
; CHECK_SWIFT: Latency : 4
|
||||
; CHECK_A9: Latency : 6
|
||||
; CHECK_R52: Latency : 6
|
||||
;
|
||||
; CHECK: VDIVS
|
||||
; CHECK_SWIFT: Latency : 17
|
||||
; CHECK_A9: Latency : 16
|
||||
; CHECK_R52: Latency : 7
|
||||
;
|
||||
; CHECK: VCVTDS
|
||||
; CHECK_SWIFT: Latency : 4
|
||||
; CHECK_A9: Latency : 5
|
||||
; CHECK_R52: Latency : 6
|
||||
;
|
||||
; CHECK: VADDD
|
||||
; CHECK_SWIFT: Latency : 6
|
||||
; CHECK_A9: Latency : 5
|
||||
; CHECK_R52: Latency : 6
|
||||
;
|
||||
; CHECK: VMULD
|
||||
; CHECK_SWIFT: Latency : 6
|
||||
; CHECK_A9: Latency : 7
|
||||
; CHECK_R52: Latency : 6
|
||||
;
|
||||
; CHECK: VDIVD
|
||||
; CHECK_SWIFT: Latency : 32
|
||||
; CHECK_A9: Latency : 26
|
||||
; CHECK_R52: Latency : 17
|
||||
;
|
||||
; CHECK: VTOSIZD
|
||||
; CHECK_SWIFT: Latency : 4
|
||||
; CHECK_A9: Latency : 5
|
||||
; CHECK_R52: Latency : 6
|
||||
;
|
||||
%add = fadd float %a, %b
|
||||
%mul = fmul float %add, %add
|
||||
%div = fdiv float %mul, %b
|
||||
%conv1 = fpext float %div to double
|
||||
%add3 = fadd double %conv1, %conv1
|
||||
%mul4 = fmul double %add3, %add3
|
||||
%div5 = fdiv double %mul4, %conv1
|
||||
%conv6 = fptosi double %div5 to i32
|
||||
ret i32 %conv6
|
||||
}
|
Loading…
Reference in New Issue
Block a user