mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
AMDGPU: Implement readcyclecounter
This matches the behavior of the HSAIL clock instruction. s_realmemtime is used if the subtarget supports it, and falls back to s_memtime if not. Also introduces new intrinsics for each of s_memtime / s_memrealtime. llvm-svn: 262119
This commit is contained in:
parent
9bc441714a
commit
1bcd37150d
@ -188,6 +188,10 @@ def int_amdgcn_s_dcache_inv :
|
||||
GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
|
||||
Intrinsic<[], [], []>;
|
||||
|
||||
def int_amdgcn_s_memtime :
|
||||
GCCBuiltin<"__builtin_amdgcn_s_memtime">,
|
||||
Intrinsic<[llvm_i64_ty], [], []>;
|
||||
|
||||
def int_amdgcn_dispatch_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||
@ -246,4 +250,7 @@ def int_amdgcn_s_dcache_wb_vol :
|
||||
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
|
||||
Intrinsic<[], [], []>;
|
||||
|
||||
def int_amdgcn_s_memrealtime :
|
||||
GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
|
||||
Intrinsic<[llvm_i64_ty], [], []>;
|
||||
}
|
||||
|
@ -149,6 +149,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
|
||||
"Additional intstructions for CI+"
|
||||
>;
|
||||
|
||||
def FeatureVIInsts : SubtargetFeature<"vi-insts",
|
||||
"VIInsts",
|
||||
"true",
|
||||
"Additional intstructions for VI+"
|
||||
>;
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
@ -308,7 +314,7 @@ def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts]
|
||||
FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -81,7 +81,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
WavefrontSize(0), CFALUBug(false),
|
||||
LocalMemorySize(0), MaxPrivateElementSize(0),
|
||||
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), VIInsts(false),
|
||||
LDSBankCount(0),
|
||||
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
|
||||
EnableSIScheduler(false), FrameLowering(nullptr),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
|
||||
|
@ -88,6 +88,7 @@ private:
|
||||
bool GCN1Encoding;
|
||||
bool GCN3Encoding;
|
||||
bool CIInsts;
|
||||
bool VIInsts;
|
||||
bool FeatureDisable;
|
||||
int LDSBankCount;
|
||||
unsigned IsaVersion;
|
||||
|
@ -135,6 +135,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
|
||||
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
|
||||
|
||||
// On SI this is s_memtime and s_memrealtime on VI.
|
||||
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
|
||||
|
||||
for (MVT VT : MVT::integer_valuetypes()) {
|
||||
if (VT == MVT::i64)
|
||||
continue;
|
||||
|
@ -1077,23 +1077,31 @@ multiclass SMRD_m <smrd op, string opName, bit imm, dag outs, dag ins,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass SMRD_Inval <smrd op, string opName,
|
||||
SDPatternOperator node> {
|
||||
let hasSideEffects = 1, mayStore = 1 in {
|
||||
def "" : SMRD_Pseudo <opName, (outs), (ins), [(node)]>;
|
||||
multiclass SMRD_Special <smrd op, string opName, dag outs,
|
||||
string opStr = "",
|
||||
list<dag> pattern = []> {
|
||||
let hasSideEffects = 1 in {
|
||||
def "" : SMRD_Pseudo <opName, outs, (ins), pattern>;
|
||||
|
||||
let sbase = 0, offset = 0 in {
|
||||
let sdst = 0 in {
|
||||
def _si : SMRD_Real_si <op.SI, opName, 0, (outs), (ins), opName>;
|
||||
def _si : SMRD_Real_si <op.SI, opName, 0, outs, (ins), opName#opStr>;
|
||||
}
|
||||
|
||||
let glc = 0, sdata = 0 in {
|
||||
def _vi : SMRD_Real_vi <op.VI, opName, 0, (outs), (ins), opName>;
|
||||
def _vi : SMRD_Real_vi <op.VI, opName, 0, outs, (ins), opName#opStr>;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
multiclass SMRD_Inval <smrd op, string opName,
|
||||
SDPatternOperator node> {
|
||||
let mayStore = 1 in {
|
||||
defm : SMRD_Special<op, opName, (outs), "", [(node)]>;
|
||||
}
|
||||
}
|
||||
|
||||
class SMEM_Inval <bits<8> op, string opName, SDPatternOperator node> :
|
||||
SMRD_Real_vi<op, opName, 0, (outs), (ins), opName, [(node)]> {
|
||||
let hasSideEffects = 1;
|
||||
@ -1104,6 +1112,18 @@ class SMEM_Inval <bits<8> op, string opName, SDPatternOperator node> :
|
||||
let offset = 0;
|
||||
}
|
||||
|
||||
class SMEM_Ret <bits<8> op, string opName, SDPatternOperator node> :
|
||||
SMRD_Real_vi<op, opName, 0, (outs SReg_64:$dst), (ins),
|
||||
opName#" $dst", [(set i64:$dst, (node))]> {
|
||||
let hasSideEffects = 1;
|
||||
let mayStore = ?;
|
||||
let mayLoad = ?;
|
||||
let sbase = 0;
|
||||
let sdata = 0;
|
||||
let glc = 0;
|
||||
let offset = 0;
|
||||
}
|
||||
|
||||
multiclass SMRD_Helper <smrd op, string opName, RegisterClass baseClass,
|
||||
RegisterClass dstClass> {
|
||||
defm _IMM : SMRD_m <
|
||||
|
@ -88,7 +88,15 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
|
||||
smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
|
||||
>;
|
||||
|
||||
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
|
||||
let mayStore = ? in {
|
||||
// FIXME: mayStore = ? is a workaround for tablegen bug for different
|
||||
// inferred mayStore flags for the instruction pattern vs. standalone
|
||||
// Pat. Each considers the other contradictory.
|
||||
|
||||
defm S_MEMTIME : SMRD_Special <smrd<0x1e, 0x24>, "s_memtime",
|
||||
(outs SReg_64:$dst), " $dst", [(set i64:$dst, (int_amdgcn_s_memtime))]
|
||||
>;
|
||||
}
|
||||
|
||||
defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
|
||||
int_amdgcn_s_dcache_inv>;
|
||||
@ -3151,6 +3159,13 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
|
||||
|
||||
def : BFEPattern <V_BFE_U32, S_MOV_B32>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
def : Pat <
|
||||
(i64 (readcyclecounter)),
|
||||
(S_MEMTIME)
|
||||
>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Fract Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -103,6 +103,9 @@ def S_DCACHE_WB : SMEM_Inval <0x21,
|
||||
def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
|
||||
"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
|
||||
|
||||
def S_MEMREALTIME : SMEM_Ret<0x25,
|
||||
"s_memrealtime", int_amdgcn_s_memrealtime>;
|
||||
|
||||
} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
@ -114,7 +117,7 @@ def : Pat <
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// DPP Paterns
|
||||
// DPP Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat <
|
||||
@ -124,4 +127,13 @@ def : Pat <
|
||||
(as_i32imm $bank_mask), (as_i32imm $row_mask))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Misc Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat <
|
||||
(i64 (readcyclecounter)),
|
||||
(S_MEMREALTIME)
|
||||
>;
|
||||
|
||||
} // End Predicates = [isVI]
|
||||
|
22
test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
Normal file
22
test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
declare i64 @llvm.amdgcn.s.memrealtime() #0
|
||||
|
||||
; GCN-LABEL: {{^}}test_s_memrealtime:
|
||||
; GCN-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN-DAG: s_load_dwordx2
|
||||
; GCN: lgkmcnt
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN-NOT: lgkmcnt
|
||||
; GCN: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @test_s_memrealtime(i64 addrspace(1)* %out) #0 {
|
||||
%cycle0 = call i64 @llvm.amdgcn.s.memrealtime()
|
||||
store volatile i64 %cycle0, i64 addrspace(1)* %out
|
||||
|
||||
%cycle1 = call i64 @llvm.amdgcn.s.memrealtime()
|
||||
store volatile i64 %cycle1, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
23
test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
Normal file
23
test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
declare i64 @llvm.amdgcn.s.memtime() #0
|
||||
|
||||
; GCN-LABEL: {{^}}test_s_memtime:
|
||||
; GCN-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN-DAG: s_load_dwordx2
|
||||
; GCN: lgkmcnt
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN-NOT: lgkmcnt
|
||||
; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @test_s_memtime(i64 addrspace(1)* %out) #0 {
|
||||
%cycle0 = call i64 @llvm.amdgcn.s.memtime()
|
||||
store volatile i64 %cycle0, i64 addrspace(1)* %out
|
||||
|
||||
%cycle1 = call i64 @llvm.amdgcn.s.memtime()
|
||||
store volatile i64 %cycle1, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
25
test/CodeGen/AMDGPU/readcyclecounter.ll
Normal file
25
test/CodeGen/AMDGPU/readcyclecounter.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
declare i64 @llvm.readcyclecounter() #0
|
||||
|
||||
; GCN-LABEL: {{^}}test_readcyclecounter:
|
||||
; SI-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; VI-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN-DAG: s_load_dwordx2
|
||||
; GCN: lgkmcnt
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN-NOT: lgkmcnt
|
||||
; SI: s_memtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; VI: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
|
||||
%cycle0 = call i64 @llvm.readcyclecounter()
|
||||
store volatile i64 %cycle0, i64 addrspace(1)* %out
|
||||
|
||||
%cycle1 = call i64 @llvm.readcyclecounter()
|
||||
store volatile i64 %cycle1, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -67,3 +67,6 @@ s_dcache_inv
|
||||
s_dcache_inv_vol
|
||||
// CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
|
||||
// NOSI: error: instruction not supported on this GPU
|
||||
|
||||
s_memtime s[0:1]
|
||||
// GCN: s_memtime s[0:1] ; encoding: [0x00,0x00,0x80,0xc7]
|
||||
|
Loading…
Reference in New Issue
Block a user