1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[RISCV] Add new SchedRead SchedWrite

The patch fixes some typos and introduces ReadFMemBase, ReadFSGNJ32,
ReadFSGNJ64, WriteFSGNJ32, WriteFSGNJ64, ReadFMinMax32, ReadFMinMax64,
WriteFMinMax32, WriteFMinMax64, so the target CPU with different pipeline model
could use them to describe latency.

Differential Revision: https://reviews.llvm.org/D75515
This commit is contained in:
Shiva Chen 2020-02-24 14:21:26 +08:00 committed by Shiva Chen
parent 4e47211fdc
commit e9661f6b8f
6 changed files with 76 additions and 30 deletions

View File

@ -331,7 +331,7 @@ class BranchCC_rri<bits<3> funct3, string opcodestr>
: RVInstB<funct3, OPC_BRANCH, (outs),
(ins GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12),
opcodestr, "$rs1, $rs2, $imm12">,
Sched<[WriteJmp]> {
Sched<[WriteJmp, ReadJmp, ReadJmp]> {
let isBranch = 1;
let isTerminator = 1;
}

View File

@ -42,15 +42,13 @@ class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd),
(ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
(ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUD_rr_frm<bits<7> funct7, string opcodestr>
: RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd),
(ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr,
"$rd, $rs1, $rs2, $funct3">,
Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
"$rd, $rs1, $rs2, $funct3">;
class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
@ -72,7 +70,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
(ins GPR:$rs1, simm12:$imm12),
"fld", "$rd, ${imm12}(${rs1})">,
Sched<[WriteFLD64, ReadMemBase]>;
Sched<[WriteFLD64, ReadFMemBase]>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
@ -81,7 +79,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
def FSD : RVInstS<0b011, OPC_STORE_FP, (outs),
(ins FPR64:$rs2, GPR:$rs1, simm12:$imm12),
"fsd", "$rs2, ${imm12}(${rs1})">,
Sched<[WriteFST64, ReadStoreData, ReadMemBase]>;
Sched<[WriteFST64, ReadStoreData, ReadFMemBase]>;
def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">,
Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
@ -96,26 +94,35 @@ def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">,
Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
def : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">;
def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">;
def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">,
Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
def : FPALUDDynFrmAlias<FADD_D, "fadd.d">;
def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">;
def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">,
Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
def : FPALUDDynFrmAlias<FSUB_D, "fsub.d">;
def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">;
def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">,
Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>;
def : FPALUDDynFrmAlias<FMUL_D, "fmul.d">;
def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">;
def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">,
Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>;
def : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">;
def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">,
Sched<[WriteFSqrt32, ReadFSqrt32]> {
Sched<[WriteFSqrt64, ReadFSqrt64]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">;
def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">;
def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">;
def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">;
def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">;
def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">,
Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">,
Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">,
Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">,
Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>;
def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">,
Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>;
def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">,
Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> {

View File

@ -60,8 +60,7 @@ class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd),
(ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
(ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUS_rr_frm<bits<7> funct7, string opcodestr>
@ -106,7 +105,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
(ins GPR:$rs1, simm12:$imm12),
"flw", "$rd, ${imm12}(${rs1})">,
Sched<[WriteFLD32, ReadMemBase]>;
Sched<[WriteFLD32, ReadFMemBase]>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
@ -115,7 +114,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
def FSW : RVInstS<0b010, OPC_STORE_FP, (outs),
(ins FPR32:$rs2, GPR:$rs1, simm12:$imm12),
"fsw", "$rs2, ${imm12}(${rs1})">,
Sched<[WriteFST32, ReadStoreData, ReadMemBase]>;
Sched<[WriteFST32, ReadStoreData, ReadFMemBase]>;
def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">,
Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>;
@ -149,11 +148,16 @@ def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">,
}
def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>;
def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">;
def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">;
def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">;
def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">;
def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">;
def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">,
Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">,
Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">,
Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">,
Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>;
def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">,
Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>;
def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">,
Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {

View File

@ -88,10 +88,18 @@ def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>;
def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>;
// Most FP single precision operations are 4 cycles
def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]> { let Latency = 4; }
let Latency = 4 in {
def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]>;
def : WriteRes<WriteFSGNJ32, [Rocket32UnitFPALU]>;
def : WriteRes<WriteFMinMax32, [Rocket32UnitFPALU]>;
}
// Most FP double precision operations are 6 cycles
def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]> { let Latency = 6; }
let Latency = 6 in {
def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]>;
def : WriteRes<WriteFSGNJ64, [Rocket32UnitFPALU]>;
def : WriteRes<WriteFMinMax64, [Rocket32UnitFPALU]>;
}
let Latency = 2 in {
def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
@ -180,6 +188,7 @@ def : ReadAdvance<ReadAtomicLDW, 0>;
def : ReadAdvance<ReadAtomicLDD, 0>;
def : ReadAdvance<ReadAtomicSTW, 0>;
def : ReadAdvance<ReadAtomicSTD, 0>;
def : ReadAdvance<ReadFMemBase, 0>;
def : ReadAdvance<ReadFALU32, 0>;
def : ReadAdvance<ReadFALU64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
@ -194,6 +203,10 @@ def : ReadAdvance<ReadFSqrt32, 0>;
def : ReadAdvance<ReadFSqrt64, 0>;
def : ReadAdvance<ReadFCmp32, 0>;
def : ReadAdvance<ReadFCmp64, 0>;
def : ReadAdvance<ReadFSGNJ32, 0>;
def : ReadAdvance<ReadFSGNJ64, 0>;
def : ReadAdvance<ReadFMinMax32, 0>;
def : ReadAdvance<ReadFMinMax64, 0>;
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
def : ReadAdvance<ReadFCvtF64ToI32, 0>;

View File

@ -101,10 +101,18 @@ def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>;
def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>;
// Most FP single precision operations are 4 cycles
def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]> { let Latency = 4; }
let Latency = 4 in {
def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]>;
def : WriteRes<WriteFSGNJ32, [Rocket64UnitFPALU]>;
def : WriteRes<WriteFMinMax32, [Rocket64UnitFPALU]>;
}
let Latency = 6 in {
// Most FP double precision operations are 6 cycles
def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]>;
def : WriteRes<WriteFSGNJ64, [Rocket64UnitFPALU]>;
def : WriteRes<WriteFMinMax64, [Rocket64UnitFPALU]>;
}
// Conversion instructions
let Latency = 2 in {
@ -181,6 +189,7 @@ def : ReadAdvance<ReadAtomicLDW, 0>;
def : ReadAdvance<ReadAtomicLDD, 0>;
def : ReadAdvance<ReadAtomicSTW, 0>;
def : ReadAdvance<ReadAtomicSTD, 0>;
def : ReadAdvance<ReadFMemBase, 0>;
def : ReadAdvance<ReadFALU32, 0>;
def : ReadAdvance<ReadFALU64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
@ -195,6 +204,10 @@ def : ReadAdvance<ReadFSqrt32, 0>;
def : ReadAdvance<ReadFSqrt64, 0>;
def : ReadAdvance<ReadFCmp32, 0>;
def : ReadAdvance<ReadFCmp64, 0>;
def : ReadAdvance<ReadFSGNJ32, 0>;
def : ReadAdvance<ReadFSGNJ64, 0>;
def : ReadAdvance<ReadFMinMax32, 0>;
def : ReadAdvance<ReadFMinMax64, 0>;
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
def : ReadAdvance<ReadFCvtF64ToI32, 0>;

View File

@ -71,6 +71,10 @@ def WriteFClass32 : SchedWrite; // 32-bit floating point classify
def WriteFClass64 : SchedWrite; // 64-bit floating point classify
def WriteFCmp32 : SchedWrite; // 32-bit floating point compare
def WriteFCmp64 : SchedWrite; // 64-bit floating point compare
def WriteFSGNJ32 : SchedWrite; // 32-bit floating point sign-injection
def WriteFSGNJ64 : SchedWrite; // 64-bit floating point sign-injection
def WriteFMinMax32 : SchedWrite; // 32-bit floating point min or max
def WriteFMinMax64 : SchedWrite; // 64-bit floating point min or max
def WriteFMovF32ToI32 : SchedWrite;
def WriteFMovI32ToF32 : SchedWrite;
@ -89,6 +93,7 @@ def ReadJmp : SchedRead;
def ReadJalr : SchedRead;
def ReadCSR : SchedRead;
def ReadMemBase : SchedRead;
def ReadFMemBase : SchedRead;
def ReadStoreData : SchedRead;
def ReadIALU : SchedRead;
def ReadIALU32 : SchedRead; // 32-bit integer ALU operations on RV64I
@ -120,6 +125,10 @@ def ReadFSqrt32 : SchedRead; // 32-bit floating point sqrt
def ReadFSqrt64 : SchedRead; // 64-bit floating point sqrt
def ReadFCmp32 : SchedRead;
def ReadFCmp64 : SchedRead;
def ReadFSGNJ32 : SchedRead;
def ReadFSGNJ64 : SchedRead;
def ReadFMinMax32 : SchedRead;
def ReadFMinMax64 : SchedRead;
def ReadFCvtF32ToI32 : SchedRead;
def ReadFCvtF32ToI64 : SchedRead;
def ReadFCvtF64ToI32 : SchedRead;