AMDGPU: Don't assert on a16 images on targets without FeatureR128A16

Currently the lowering for i16 image coordinates asserts on gfx10. I'm somewhat confused by this though. The feature is missing from the gfx10 feature lists, but the a16 bit appears to be present in the manual for MIMG instructions.
2024-11-23 03:02:36 +01:00 · 2020-01-16 15:44:36 -05:00 · 2020-01-16 15:44:36 -05:00 · 84b5307ef3
commit 84b5307ef3
parent b517349a03
2 changed files with 22 additions and 2 deletions
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -5460,8 +5460,11 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
  unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
  MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
  const MVT VAddrScalarVT = VAddrVT.getScalarType();
-  if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16)) &&
-      ST->hasFeature(AMDGPU::FeatureR128A16)) {
+  if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) {
+    // Illegal to use a16 images
+    if (!ST->hasFeature(AMDGPU::FeatureR128A16))
+      return Op;
+
    IsA16 = true;
    const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
    for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
--- a/test/CodeGen/AMDGPU/unsupported-image-a16.ll
+++ b/test/CodeGen/AMDGPU/unsupported-image-a16.ll
@ -0,0 +1,17 @@
+; RUN: not llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+; Make sure this doesn't assert on targets without the r128-16
+; feature, and instead generates a slection error.
+
+; ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.load.1d
+
+define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #0
+
+attributes #0 = { nounwind readonly }