From ece3299a717166cd121fde32008144729c412374 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 26 Jul 2021 18:10:28 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Fix selecting G_SEXTLOAD/G_ZEXTLOAD pre-gfx9 The patterns for the m0 glue patterns were failing to import. --- lib/Target/AMDGPU/AMDGPUGISel.td | 2 + .../GlobalISel/inst-select-sextload-local.mir | 120 ++++++++++++++++++ .../GlobalISel/inst-select-zextload-local.mir | 120 ++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index 36e04fc78c3..521c8f261a0 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -128,6 +128,8 @@ def gi_smrd_buffer_imm32 : def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; + let IfSignExtend = G_SEXTLOAD; + let IfZeroExtend = G_ZEXTLOAD; } def : GINodeEquiv { diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir new file mode 100644 index 00000000000..9a6dea64e59 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir @@ -0,0 +1,120 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s + +--- +name: sextload_local_s32_from_s8_align1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- +name: sextload_local_s32_from_s16_align2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: sextload_local_s32_from_s16_align2 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_I16_]] + ; GFX7-LABEL: name: sextload_local_s32_from_s16_align2 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_I16_]] + ; GFX9-LABEL: name: sextload_local_s32_from_s16_align2 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_I16_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 3) + $vgpr0 = COPY %1 + +... + +# --- +# name: sextload_local_s16_from_s8_align1 +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true + +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:vgpr(p3) = COPY $vgpr0 +# %1:vgpr(s16) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3) +# %2:vgpr(s32) = G_ANYEXT %1 +# $vgpr0 = COPY %2 + +# ... + +--- +name: sextload_local_s32_from_s8_align1_offset4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p3) = G_PTR_ADD %0, %1 + %3:vgpr(s32) = G_SEXTLOAD %2 :: (load (s8), align 1, addrspace 3) + $vgpr0 = COPY %3 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir new file mode 100644 index 00000000000..ed4908cafa3 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir @@ -0,0 +1,120 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s + +--- +name: zextload_local_s32_from_s8_align1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- +name: zextload_local_s32_from_s16_align2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: zextload_local_s32_from_s16_align2 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-LABEL: name: zextload_local_s32_from_s16_align2 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX9-LABEL: name: zextload_local_s32_from_s16_align2 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 3) + $vgpr0 = COPY %1 + +... + +# --- +# name: zextload_local_s16_from_s8_align1 +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true + +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:vgpr(p3) = COPY $vgpr0 +# %1:vgpr(s16) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3) +# %2:vgpr(s32) = G_ANYEXT %1 +# $vgpr0 = COPY %2 + +# ... + +--- +name: zextload_local_s32_from_s8_align1_offset4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p3) = G_PTR_ADD %0, %1 + %3:vgpr(s32) = G_ZEXTLOAD %2 :: (load (s8), align 1, addrspace 3) + $vgpr0 = COPY %3 + +...