1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

AMDGPU: Fix i64 global cmpxchg

This was using extract_subreg sub0 to extract the low register
of the result instead of sub0_sub1, producing an invalid copy.

There doesn't seem to be a way to use the compound subreg indices
in tablegen since those are generated, so manually select it.

llvm-svn: 272344
This commit is contained in:
Matt Arsenault 2016-06-09 23:42:48 +00:00
parent ce7bee3c77
commit bcec847408
5 changed files with 189 additions and 136 deletions

View File

@ -128,7 +128,7 @@ private:
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
void SelectMUBUFConstant(SDValue Constant,
@ -169,6 +169,7 @@ private:
void SelectS_BFEFromShifts(SDNode *N);
void SelectS_BFE(SDNode *N);
void SelectBRCOND(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@ -545,6 +546,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::BRCOND:
SelectBRCOND(N);
return;
case AMDGPUISD::ATOMIC_CMP_SWAP:
SelectATOMIC_CMP_SWAP(N);
return;
}
SelectCode(N);
@ -1014,9 +1019,11 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
}
if (isLegalMUBUFImmOffset(C1)) {
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
} else if (isUInt<32>(C1->getZExtValue())) {
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
if (isUInt<32>(C1->getZExtValue())) {
// Illegal offset, store it in soffset.
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
@ -1151,8 +1158,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &GLC) const {
SDValue SLC, TFE;
SDValue &SLC) const {
SDValue GLC, TFE;
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
@ -1488,6 +1495,68 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
return;
}
// This is here because there isn't a way to use the generated sub0_sub1 as the
// subreg index to EXTRACT_SUBREG in tablegen.
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
MVT VT = N->getSimpleValueType(0);
bool Is32 = (VT == MVT::i32);
SDLoc SL(N);
MachineSDNode *CmpSwap = nullptr;
if (Subtarget->hasAddr64()) {
SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
SDValue CmpVal = Mem->getOperand(2);
// XXX - Do we care about glue operands?
SDValue Ops[] = {
CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
}
}
if (!CmpSwap) {
SDValue SRsrc, SOffset, Offset, SLC;
if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
SDValue CmpVal = Mem->getOperand(2);
SDValue Ops[] = {
CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
}
}
if (!CmpSwap) {
SelectCode(N);
return;
}
MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
*MMOs = Mem->getMemOperand();
CmpSwap->setMemRefs(MMOs, MMOs + 1);
unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
SDValue Extract
= CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 0), Extract);
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
CurDAG->RemoveDeadNode(N);
}
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {

View File

@ -191,6 +191,10 @@ public:
return FlatForGlobal;
}
bool hasAddr64() const {
return (getGeneration() < VOLCANIC_ISLANDS);
}
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}

View File

@ -2197,9 +2197,9 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
SDValue NewOld = DAG.getBuildVector(VecType, DL, {New, Old});
SDValue Ops[] = { ChainIn, Addr, NewOld };
SDVTList VTList = DAG.getVTList(VT, MVT::Other);
return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL,
VTList, Ops, VT, AtomicNode->getMemOperand());
return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL, Op->getVTList(),
Ops, VT, AtomicNode->getMemOperand());
}
//===----------------------------------------------------------------------===//

View File

@ -3285,37 +3285,6 @@ def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
multiclass MUBUFCmpSwapPat <Instruction inst_addr64, Instruction inst_offset,
SDPatternOperator node, ValueType data_vt,
ValueType node_vt> {
let Predicates = [isSI] in {
def : Pat <
(node_vt (node (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc), data_vt:$vdata_in)),
(EXTRACT_SUBREG
(inst_addr64 $vdata_in, $vaddr, $srsrc, $soffset, $offset, $slc), sub0)
>;
}
def : Pat <
(node_vt (node (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
i1:$slc), data_vt:$vdata_in)),
(EXTRACT_SUBREG
(inst_offset $vdata_in, $srsrc, $soffset, $offset, $slc), sub0)
>;
}
defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64,
BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET,
atomic_cmp_swap_global, v2i32, i32>;
defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64,
BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET,
atomic_cmp_swap_global, v2i64, i64>;
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//

View File

@ -841,6 +841,113 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_offset:
; GCN: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
define void @atomic_cmpxchg_i64_offset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x11940
; GCN: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
define void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
; GCN: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]:
define void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
define void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]:
define void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64:
; GCN: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_cmpxchg_i64(i64 addrspace(1)* %out, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret:
; GCN: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]:
define void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
define void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]:
define void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_load_i64_offset:
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
@ -928,99 +1035,3 @@ entry:
store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_offset:
; GCN: buffer_atomic_cmpswapx2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_cmpxchg_i64_offset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
; GCN: buffer_atomic_cmpswapx2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dwordx2 v[[RET]]
define void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
; SI: buffer_atomic_cmpswapx2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_cmpswapx2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
define void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
; SI: buffer_atomic_cmpswapx2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_cmpswapx2 v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: buffer_store_dword v[[RET]]
define void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64:
; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_cmpxchg_i64(i64 addrspace(1)* %out, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret:
; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; GCN: buffer_store_dword v[[RET]]
define void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
define void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: buffer_store_dword v[[RET]]
define void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2
ret void
}