mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
d75bc90d2a
This patch adds a post-linking pass which replaces the function pointer of enqueued block kernel with a global variable (runtime handle) and adds runtime-handle attribute to the enqueued block kernel. In LLVM CodeGen the runtime-handle metadata will be translated to RuntimeHandle metadata in code object. Runtime allocates a global buffer for each kernel with RuntimeHandel metadata and saves the kernel address required for the AQL packet into the buffer. __enqueue_kernel function in device library knows that the invoke function pointer in the block literal is actually runtime handle and loads the kernel address from it and puts it into AQL packet for dispatching. This cannot be done in FE since FE cannot create a unique global variable with external linkage across LLVM modules. The global variable with internal linkage does not work since optimization passes will try to replace loads of the global variable with its initialization value. Differential Revision: https://reviews.llvm.org/D38610 llvm-svn: 315352
106 lines
3.0 KiB
CMake
106 lines
3.0 KiB
CMake
set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
|
|
|
|
tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
|
|
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
|
|
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
|
|
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
|
|
tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
|
|
tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
|
|
tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter)
|
|
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
|
|
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
|
|
tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
|
|
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
|
|
tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
|
|
tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
|
|
add_public_tablegen_target(AMDGPUCommonTableGen)
|
|
|
|
add_llvm_target(AMDGPUCodeGen
|
|
AMDGPUAliasAnalysis.cpp
|
|
AMDGPUAlwaysInlinePass.cpp
|
|
AMDGPUAnnotateKernelFeatures.cpp
|
|
AMDGPUAnnotateUniformValues.cpp
|
|
AMDGPUArgumentUsageInfo.cpp
|
|
AMDGPUAsmPrinter.cpp
|
|
AMDGPUCallLowering.cpp
|
|
AMDGPUCodeGenPrepare.cpp
|
|
AMDGPUFrameLowering.cpp
|
|
AMDGPUInstrInfo.cpp
|
|
AMDGPUInstructionSelector.cpp
|
|
AMDGPUIntrinsicInfo.cpp
|
|
AMDGPUISelDAGToDAG.cpp
|
|
AMDGPUISelLowering.cpp
|
|
AMDGPULegalizerInfo.cpp
|
|
AMDGPULibCalls.cpp
|
|
AMDGPULibFunc.cpp
|
|
AMDGPULowerIntrinsics.cpp
|
|
AMDGPUMachineCFGStructurizer.cpp
|
|
AMDGPUMachineFunction.cpp
|
|
AMDGPUMachineModuleInfo.cpp
|
|
AMDGPUMacroFusion.cpp
|
|
AMDGPUMCInstLower.cpp
|
|
AMDGPUOpenCLEnqueuedBlockLowering.cpp
|
|
AMDGPUOpenCLImageTypeLoweringPass.cpp
|
|
AMDGPUPromoteAlloca.cpp
|
|
AMDGPURegAsmNames.inc.cpp
|
|
AMDGPURegisterBankInfo.cpp
|
|
AMDGPURegisterInfo.cpp
|
|
AMDGPURewriteOutArguments.cpp
|
|
AMDGPUSubtarget.cpp
|
|
AMDGPUTargetMachine.cpp
|
|
AMDGPUTargetObjectFile.cpp
|
|
AMDGPUTargetTransformInfo.cpp
|
|
AMDGPUUnifyDivergentExitNodes.cpp
|
|
AMDGPUUnifyMetadata.cpp
|
|
AMDGPUInline.cpp
|
|
AMDILCFGStructurizer.cpp
|
|
GCNHazardRecognizer.cpp
|
|
GCNIterativeScheduler.cpp
|
|
GCNMinRegStrategy.cpp
|
|
GCNRegPressure.cpp
|
|
GCNSchedStrategy.cpp
|
|
R600ClauseMergePass.cpp
|
|
R600ControlFlowFinalizer.cpp
|
|
R600EmitClauseMarkers.cpp
|
|
R600ExpandSpecialInstrs.cpp
|
|
R600FrameLowering.cpp
|
|
R600InstrInfo.cpp
|
|
R600ISelLowering.cpp
|
|
R600MachineFunctionInfo.cpp
|
|
R600MachineScheduler.cpp
|
|
R600OptimizeVectorRegisters.cpp
|
|
R600Packetizer.cpp
|
|
R600RegisterInfo.cpp
|
|
SIAnnotateControlFlow.cpp
|
|
SIDebuggerInsertNops.cpp
|
|
SIFixSGPRCopies.cpp
|
|
SIFixVGPRCopies.cpp
|
|
SIFixWWMLiveness.cpp
|
|
SIFoldOperands.cpp
|
|
SIFrameLowering.cpp
|
|
SIInsertSkips.cpp
|
|
SIInsertWaitcnts.cpp
|
|
SIInsertWaits.cpp
|
|
SIInstrInfo.cpp
|
|
SIISelLowering.cpp
|
|
SILoadStoreOptimizer.cpp
|
|
SILowerControlFlow.cpp
|
|
SILowerI1Copies.cpp
|
|
SIMachineFunctionInfo.cpp
|
|
SIMachineScheduler.cpp
|
|
SIMemoryLegalizer.cpp
|
|
SIOptimizeExecMasking.cpp
|
|
SIOptimizeExecMaskingPreRA.cpp
|
|
SIPeepholeSDWA.cpp
|
|
SIRegisterInfo.cpp
|
|
SIShrinkInstructions.cpp
|
|
SIWholeQuadMode.cpp
|
|
)
|
|
|
|
add_subdirectory(AsmParser)
|
|
add_subdirectory(InstPrinter)
|
|
add_subdirectory(Disassembler)
|
|
add_subdirectory(TargetInfo)
|
|
add_subdirectory(MCTargetDesc)
|
|
add_subdirectory(Utils)
|