mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Don't assert on a16 images on targets without FeatureR128A16
Currently the lowering for i16 image coordinates asserts on gfx10. I'm somewhat confused by this though. The feature is missing from the gfx10 feature lists, but the a16 bit appears to be present in the manual for MIMG instructions.
This commit is contained in:
parent
b517349a03
commit
84b5307ef3
@ -5460,8 +5460,11 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
|
||||
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
|
||||
const MVT VAddrScalarVT = VAddrVT.getScalarType();
|
||||
if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16)) &&
|
||||
ST->hasFeature(AMDGPU::FeatureR128A16)) {
|
||||
if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) {
|
||||
// Illegal to use a16 images
|
||||
if (!ST->hasFeature(AMDGPU::FeatureR128A16))
|
||||
return Op;
|
||||
|
||||
IsA16 = true;
|
||||
const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
|
||||
for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
|
||||
|
17
test/CodeGen/AMDGPU/unsupported-image-a16.ll
Normal file
17
test/CodeGen/AMDGPU/unsupported-image-a16.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: not llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
|
||||
; Make sure this doesn't assert on targets without the r128-16
|
||||
; feature, and instead generates a slection error.
|
||||
|
||||
; ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.load.1d
|
||||
|
||||
define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
|
||||
main_body:
|
||||
%s = extractelement <2 x i16> %coords, i32 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #0
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
Loading…
Reference in New Issue
Block a user