1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[NVPTX] Enable lowering of atomics on local memory

LLVM does not have valid assembly backends for atomicrmw on local memory. However, as this memory is thread local, we should be able to lower this to the relevant load/store.

Differential Revision: https://reviews.llvm.org/D98650
This commit is contained in:
William S. Moses 2021-03-15 14:27:06 -04:00
parent 8d37cf9ea8
commit 9ac62ee58d
7 changed files with 120 additions and 2 deletions

View File

@ -24,6 +24,12 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
static bool isRequired() { return true; }
};
class AtomicRMWInst;
/// Convert the given RMWI into primitive load and stores,
/// assuming that doing so is legal. Return true if the lowering
/// succeeds.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI);
}
#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H

View File

@ -12,6 +12,7 @@ add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAtomicLower.cpp
NVPTXAsmPrinter.cpp
NVPTXAssignValidGlobalNames.cpp
NVPTXFrameLowering.cpp

View File

@ -0,0 +1,67 @@
//===-- NVPTXAtomicLower.cpp - Lower atomics of local memory ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Lower atomics of local memory to simple load/stores
//
//===----------------------------------------------------------------------===//
#include "NVPTXAtomicLower.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
using namespace llvm;
namespace {
// Hoisting the alloca instructions in the non-entry blocks to the entry
// block.
class NVPTXAtomicLower : public FunctionPass {
public:
static char ID; // Pass ID
NVPTXAtomicLower() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
StringRef getPassName() const override {
return "NVPTX lower atomics of local memory";
}
bool runOnFunction(Function &F) override;
};
} // namespace
bool NVPTXAtomicLower::runOnFunction(Function &F) {
SmallVector<AtomicRMWInst *> LocalMemoryAtomics;
for (Instruction &I : instructions(F))
if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
if (RMWI->getPointerAddressSpace() == ADDRESS_SPACE_LOCAL)
LocalMemoryAtomics.push_back(RMWI);
bool Changed = false;
for (AtomicRMWInst *RMWI : LocalMemoryAtomics)
Changed |= lowerAtomicRMWInst(RMWI);
return Changed;
}
char NVPTXAtomicLower::ID = 0;
void llvm::initializeNVPTXAtomicLowerPass(PassRegistry &);
INITIALIZE_PASS(NVPTXAtomicLower, "nvptx-atomic-lower",
"Lower atomics of local memory to simple load/stores", false,
false)
FunctionPass *llvm::createNVPTXAtomicLowerPass() {
return new NVPTXAtomicLower();
}

View File

@ -0,0 +1,22 @@
//===-- NVPTXAtomicLower.h - Lower atomics of local memory ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Lower atomics of local memory to simple load/stores
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H
namespace llvm {
class FunctionPass;
extern FunctionPass *createNVPTXAtomicLowerPass();
} // end namespace llvm
#endif

View File

@ -13,6 +13,7 @@
#include "NVPTXTargetMachine.h"
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXAtomicLower.h"
#include "NVPTXLowerAggrCopies.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXTargetTransformInfo.h"
@ -65,6 +66,7 @@ void initializeNVVMIntrRangePass(PassRegistry&);
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAtomicLowerPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
@ -86,6 +88,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
initializeGenericToNVVMPass(PR);
initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);
initializeNVPTXAtomicLowerPass(PR);
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
@ -252,6 +255,7 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() {
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
addPass(createInferAddressSpacesPass());
addPass(createNVPTXAtomicLowerPass());
}
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {

View File

@ -40,7 +40,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
return true;
}
static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
IRBuilder<> Builder(RMWI);
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
@ -123,7 +123,7 @@ static bool runOnBasicBlock(BasicBlock &BB) {
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&Inst))
Changed |= LowerAtomicCmpXchgInst(CXI);
else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&Inst))
Changed |= LowerAtomicRMWInst(RMWI);
Changed |= lowerAtomicRMWInst(RMWI);
else if (LoadInst *LI = dyn_cast<LoadInst>(&Inst)) {
if (LI->isAtomic())
LowerLoadInst(LI);

View File

@ -0,0 +1,18 @@
; RUN: opt < %s -S -nvptx-atomic-lower | FileCheck %s
; This test ensures that there is a legal way for ptx to lower atomics
; on local memory. Here, we demonstrate this by lowering them to simple
; load and stores.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-unknown-unknown"
define double @kernel(double addrspace(5)* %ptr, double %val) {
%res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8
ret double %res
; CHECK: %1 = load double, double addrspace(5)* %ptr, align 8
; CHECK-NEXT: %2 = fadd double %1, %val
; CHECK-NEXT: store double %2, double addrspace(5)* %ptr, align 8
; CHECK-NEXT: ret double %1
}