From 90731e897ee9cde867dd02768d7883bc5d384b6b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 1 Aug 2019 03:33:15 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Handle G_ATOMICRMW_FADD llvm-svn: 367509 --- lib/Target/AMDGPU/AMDGPUGISel.td | 1 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 1 + lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 + lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + .../inst-select-atomicrmw-fadd-local.mir | 115 ++++++++++++++++++ .../GlobalISel/legalize-atomicrmw-fadd.mir | 17 +++ .../regbankselect-atomicrmw-fadd.mir | 21 ++++ 7 files changed, 159 insertions(+) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index 8f14055180e..aade2e8eb6c 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -87,6 +87,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; class GISelSop2Pat < diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 7dc537c21ba..9fe9b3782c8 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1395,6 +1395,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_ATOMICRMW_MAX: case TargetOpcode::G_ATOMICRMW_UMIN: case TargetOpcode::G_ATOMICRMW_UMAX: + case TargetOpcode::G_ATOMICRMW_FADD: return selectG_LOAD_ATOMICRMW(I, CoverageInfo); case TargetOpcode::G_SELECT: return selectG_SELECT(I); diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index baceab3bec7..aaa51322c9d 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -593,6 +593,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); } + getActionDefinitionsBuilder(G_ATOMICRMW_FADD) + .legalFor({{S32, LocalPtr}}); + // TODO: Pointer types, any 32-bit or 64-bit vector getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 7803b68bf33..79950946a06 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2229,6 +2229,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_ATOMICRMW_MIN: case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: + case AMDGPU::G_ATOMICRMW_FADD: case AMDGPU::G_ATOMIC_CMPXCHG: { return getDefaultMappingAllVGPR(MI); } diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir new file mode 100644 index 00000000000..d1862a18b4c --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -0,0 +1,115 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# XUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + + +--- +name: atomicrmw_fadd_s32_local +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomicrmw_fadd_s32_local + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX7-LABEL: name: atomicrmw_fadd_s32_local + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-LABEL: name: atomicrmw_fadd_s32_local + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3) + $vgpr0 = COPY %2 + +... + +--- +name: atomicrmw_fadd_s32_local_noret +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7-LABEL: name: atomicrmw_fadd_s32_local_noret + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3) + +... + +--- +name: atomicrmw_fadd_s32_local_gep4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX7-LABEL: name: atomicrmw_fadd_s32_local_gep4 + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_gep4 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_CONSTANT i32 4 + %3:vgpr(p3) = G_GEP %0, %2 + %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p3), %1 :: (load store seq_cst 4, addrspace 3) + $vgpr0 = COPY %4 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir new file mode 100644 index 00000000000..8b01cd4ec72 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir @@ -0,0 +1,17 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: atomicrmw_fadd_local_i32 + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: atomicrmw_fadd_local_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 3) +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir new file mode 100644 index 00000000000..842e7d71e28 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir @@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: atomicrmw_fadd_local_i32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: atomicrmw_fadd_local_i32_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 3) +...