mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AMDGPU] Add support for a16 modifiear for gfx9
Summary: Adding support for a16 for gfx9. A16 bit replaces r128 bit for gfx9. Change-Id: Ie8b881e4e6d2f023fb5e0150420893513e5f4841 Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50575 llvm-svn: 340831
This commit is contained in:
parent
9bb133aa4e
commit
a30bc8842e
@ -242,6 +242,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
|
||||
"Support DPP (Data Parallel Primitives) extension"
|
||||
>;
|
||||
|
||||
def FeatureR128A16 : SubtargetFeature<"r128-a16",
|
||||
"HasR128A16",
|
||||
"true",
|
||||
"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
|
||||
>;
|
||||
|
||||
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
|
||||
"HasIntClamp",
|
||||
"true",
|
||||
@ -444,7 +450,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
|
||||
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
|
||||
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
|
||||
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
|
||||
FeatureAddNoCarryInsts, FeatureScalarAtomics
|
||||
FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
|
||||
]
|
||||
>;
|
||||
|
||||
@ -703,6 +709,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
|
||||
def HasDPP : Predicate<"Subtarget->hasDPP()">,
|
||||
AssemblerPredicate<"FeatureDPP">;
|
||||
|
||||
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
|
||||
AssemblerPredicate<"FeatureR128A16">;
|
||||
|
||||
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
|
||||
AssemblerPredicate<"FeatureIntClamp">;
|
||||
|
||||
|
@ -197,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasSDWAMac(false),
|
||||
HasSDWAOutModsVOPC(false),
|
||||
HasDPP(false),
|
||||
HasR128A16(false),
|
||||
HasDLInsts(false),
|
||||
D16PreservesUnusedBits(false),
|
||||
FlatAddressSpace(false),
|
||||
|
@ -358,6 +358,7 @@ protected:
|
||||
bool HasSDWAMac;
|
||||
bool HasSDWAOutModsVOPC;
|
||||
bool HasDPP;
|
||||
bool HasR128A16;
|
||||
bool HasDLInsts;
|
||||
bool D16PreservesUnusedBits;
|
||||
bool FlatAddressSpace;
|
||||
@ -791,6 +792,10 @@ public:
|
||||
return HasDPP;
|
||||
}
|
||||
|
||||
bool hasR128A16() const {
|
||||
return HasR128A16;
|
||||
}
|
||||
|
||||
bool enableSIScheduler() const {
|
||||
return EnableSIScheduler;
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ public:
|
||||
ImmTyDMask,
|
||||
ImmTyUNorm,
|
||||
ImmTyDA,
|
||||
ImmTyR128,
|
||||
ImmTyR128A16,
|
||||
ImmTyLWE,
|
||||
ImmTyExpTgt,
|
||||
ImmTyExpCompr,
|
||||
@ -290,7 +290,7 @@ public:
|
||||
bool isDMask() const { return isImmTy(ImmTyDMask); }
|
||||
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
|
||||
bool isDA() const { return isImmTy(ImmTyDA); }
|
||||
bool isR128() const { return isImmTy(ImmTyR128); }
|
||||
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
|
||||
bool isLWE() const { return isImmTy(ImmTyLWE); }
|
||||
bool isOff() const { return isImmTy(ImmTyOff); }
|
||||
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
|
||||
@ -678,7 +678,7 @@ public:
|
||||
case ImmTyDMask: OS << "DMask"; break;
|
||||
case ImmTyUNorm: OS << "UNorm"; break;
|
||||
case ImmTyDA: OS << "DA"; break;
|
||||
case ImmTyR128: OS << "R128"; break;
|
||||
case ImmTyR128A16: OS << "R128A16"; break;
|
||||
case ImmTyLWE: OS << "LWE"; break;
|
||||
case ImmTyOff: OS << "Off"; break;
|
||||
case ImmTyExpTgt: OS << "ExpTgt"; break;
|
||||
@ -1090,7 +1090,6 @@ private:
|
||||
bool validateMIMGAtomicDMask(const MCInst &Inst);
|
||||
bool validateMIMGGatherDMask(const MCInst &Inst);
|
||||
bool validateMIMGDataSize(const MCInst &Inst);
|
||||
bool validateMIMGR128(const MCInst &Inst);
|
||||
bool validateMIMGD16(const MCInst &Inst);
|
||||
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
|
||||
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
|
||||
@ -2445,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
|
||||
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
|
||||
|
||||
const unsigned Opc = Inst.getOpcode();
|
||||
const MCInstrDesc &Desc = MII.get(Opc);
|
||||
|
||||
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
|
||||
return true;
|
||||
|
||||
int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
|
||||
assert(Idx != -1);
|
||||
|
||||
bool R128 = (Inst.getOperand(Idx).getImm() != 0);
|
||||
|
||||
return !R128 || hasMIMG_R128();
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
|
||||
|
||||
const unsigned Opc = Inst.getOpcode();
|
||||
@ -2495,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
|
||||
"integer clamping is not supported on this GPU");
|
||||
return false;
|
||||
}
|
||||
if (!validateMIMGR128(Inst)) {
|
||||
Error(IDLoc,
|
||||
"r128 modifier is not supported on this GPU");
|
||||
return false;
|
||||
}
|
||||
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
|
||||
if (!validateMIMGD16(Inst)) {
|
||||
Error(IDLoc,
|
||||
@ -3463,6 +3441,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
|
||||
case AsmToken::Identifier: {
|
||||
StringRef Tok = Parser.getTok().getString();
|
||||
if (Tok == Name) {
|
||||
if (Tok == "r128" && isGFX9())
|
||||
Error(S, "r128 modifier is not supported on this GPU");
|
||||
if (Tok == "a16" && !isGFX9())
|
||||
Error(S, "a16 modifier is not supported on this GPU");
|
||||
Bit = 1;
|
||||
Parser.Lex();
|
||||
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
|
||||
@ -4705,7 +4687,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
|
||||
@ -4815,7 +4797,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
|
||||
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
|
||||
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
|
||||
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
|
||||
{"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
|
||||
{"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
|
||||
{"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
|
||||
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
|
||||
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
|
||||
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
|
||||
|
@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
|
||||
printNamedBit(MI, OpNo, O, "da");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
|
||||
void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
printNamedBit(MI, OpNo, O, "r128");
|
||||
if (STI.hasFeature(AMDGPU::FeatureR128A16))
|
||||
printNamedBit(MI, OpNo, O, "a16");
|
||||
else
|
||||
printNamedBit(MI, OpNo, O, "r128");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
|
||||
|
@ -80,7 +80,7 @@ private:
|
||||
raw_ostream &O);
|
||||
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printLWE(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
|
@ -141,7 +141,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
|
||||
|
||||
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
|
||||
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
@ -199,7 +199,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
|
||||
|
||||
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
|
||||
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
@ -252,7 +252,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
|
||||
|
||||
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
|
||||
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
|
||||
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
|
||||
}
|
||||
|
||||
@ -316,7 +316,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
|
||||
|
||||
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
|
||||
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
|
||||
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
|
@ -4576,6 +4576,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
const AMDGPU::ImageDimIntrinsicInfo *Intr,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
|
||||
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
|
||||
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
|
||||
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
|
||||
@ -4585,6 +4587,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
|
||||
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
|
||||
bool IsD16 = false;
|
||||
bool IsA16 = false;
|
||||
SDValue VData;
|
||||
int NumVDataDwords;
|
||||
unsigned AddrIdx; // Index of first address argument
|
||||
@ -4660,25 +4663,61 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
}
|
||||
}
|
||||
|
||||
unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
|
||||
(BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
|
||||
(BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
|
||||
(BaseOpcode->LodOrClampOrMip ? 1 : 0);
|
||||
unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
|
||||
unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
|
||||
unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
|
||||
unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
|
||||
NumCoords + NumLCM;
|
||||
unsigned NumMIVAddrs = NumVAddrs;
|
||||
|
||||
SmallVector<SDValue, 4> VAddrs;
|
||||
for (unsigned i = 0; i < NumVAddrs; ++i)
|
||||
VAddrs.push_back(Op.getOperand(AddrIdx + i));
|
||||
|
||||
// Optimize _L to _LZ when _L is zero
|
||||
if (LZMappingInfo) {
|
||||
if (auto ConstantLod =
|
||||
dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
|
||||
dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
|
||||
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
|
||||
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
|
||||
VAddrs.pop_back(); // remove 'lod'
|
||||
NumMIVAddrs--; // remove 'lod'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for 16 bit addresses and pack if true.
|
||||
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
|
||||
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
|
||||
if (VAddrVT.getScalarType() == MVT::f16 &&
|
||||
ST->hasFeature(AMDGPU::FeatureR128A16)) {
|
||||
IsA16 = true;
|
||||
for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
|
||||
SDValue AddrLo, AddrHi;
|
||||
// Push back extra arguments.
|
||||
if (i < DimIdx) {
|
||||
AddrLo = Op.getOperand(i);
|
||||
} else {
|
||||
AddrLo = Op.getOperand(i);
|
||||
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
|
||||
// in 1D, derivatives dx/dh and dx/dv are packed with undef.
|
||||
if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
|
||||
((NumGradients / 2) % 2 == 1 &&
|
||||
(i == DimIdx + (NumGradients / 2) - 1 ||
|
||||
i == DimIdx + NumGradients - 1))) {
|
||||
AddrHi = DAG.getUNDEF(MVT::f16);
|
||||
} else {
|
||||
AddrHi = Op.getOperand(i + 1);
|
||||
i++;
|
||||
}
|
||||
AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16,
|
||||
{AddrLo, AddrHi});
|
||||
AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
|
||||
}
|
||||
VAddrs.push_back(AddrLo);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < NumMIVAddrs; ++i)
|
||||
VAddrs.push_back(Op.getOperand(AddrIdx + i));
|
||||
}
|
||||
|
||||
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
|
||||
|
||||
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
|
||||
@ -4725,7 +4764,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
Ops.push_back(Unorm);
|
||||
Ops.push_back(GLC);
|
||||
Ops.push_back(SLC);
|
||||
Ops.push_back(False); // r128
|
||||
Ops.push_back(IsA16 && // a16 or r128
|
||||
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
|
||||
Ops.push_back(False); // tfe
|
||||
Ops.push_back(False); // lwe
|
||||
Ops.push_back(DimInfo->DA ? True : False);
|
||||
|
@ -754,7 +754,7 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
|
||||
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
|
||||
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
|
||||
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
|
||||
def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
|
||||
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
|
||||
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
|
||||
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
|
||||
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
|
||||
|
136
test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
Normal file
136
test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
Normal file
@ -0,0 +1,136 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_2d:
|
||||
; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_cube:
|
||||
; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_2darray:
|
||||
; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_2d:
|
||||
; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_cl_2d:
|
||||
; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_cl_2d:
|
||||
; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_b_2d:
|
||||
; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_b_2d:
|
||||
; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_b_cl_2d:
|
||||
; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
|
||||
; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_l_2d:
|
||||
; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_l_2d:
|
||||
; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_lz_2d:
|
||||
; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}gather4_c_lz_2d:
|
||||
; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
|
||||
define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind readnone }
|
435
test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
Normal file
435
test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
Normal file
@ -0,0 +1,435 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_2d:
|
||||
; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_3d:
|
||||
; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cube:
|
||||
; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_1darray:
|
||||
; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_2darray:
|
||||
; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_1d:
|
||||
; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_2d:
|
||||
; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cl_1d:
|
||||
; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cl_2d:
|
||||
; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cl_1d:
|
||||
; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cl_2d:
|
||||
; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_1d:
|
||||
; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_2d:
|
||||
; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_1d:
|
||||
; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_2d:
|
||||
; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_cl_1d:
|
||||
; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_b_cl_2d:
|
||||
; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_cl_1d:
|
||||
; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_b_cl_2d:
|
||||
; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_1d:
|
||||
; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_2d:
|
||||
; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABAL: {{^}}sample_d_3d:
|
||||
; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_1d:
|
||||
; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_2d:
|
||||
; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_cl_1d:
|
||||
; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_d_cl_2d:
|
||||
; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_cl_1d:
|
||||
; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_cl_2d:
|
||||
; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_1d:
|
||||
; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_2d:
|
||||
; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_1d:
|
||||
; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_2d:
|
||||
; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_cl_1d:
|
||||
; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_cd_cl_2d:
|
||||
; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
|
||||
; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
|
||||
; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_l_1d:
|
||||
; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_l_2d:
|
||||
; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_l_1d:
|
||||
; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_l_2d:
|
||||
; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_lz_1d:
|
||||
; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_lz_2d:
|
||||
; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_lz_1d:
|
||||
; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_lz_2d:
|
||||
; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
|
||||
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
|
||||
; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}
|
||||
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
|
||||
main_body:
|
||||
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret float %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
|
||||
; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}
|
||||
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
|
||||
main_body:
|
||||
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
ret <2 x float> %v
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
attributes #2 = { nounwind readnone }
|
@ -267,6 +267,30 @@ image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16
|
||||
// GFX8_1: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]
|
||||
// GFX9: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Image Sample: a16
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
|
||||
// GFX9: image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x80,0xf0,0xed,0xc1,0x27,0x00]
|
||||
// NOSICI: error: a16 modifier is not supported on this GPU
|
||||
// NOVI: error: a16 modifier is not supported on this GPU
|
||||
|
||||
image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
|
||||
// GFX9: image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x88,0xf0,0xed,0xc1,0x27,0x00]
|
||||
// NOSICI: error: a16 modifier is not supported on this GPU
|
||||
// NOVI: error: a16 modifier is not supported on this GPU
|
||||
|
||||
image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
|
||||
// GFX9: image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xa8,0xf0,0xed,0xc1,0x27,0x00]
|
||||
// NOSICI: error: a16 modifier is not supported on this GPU
|
||||
// NOVI: error: a16 modifier is not supported on this GPU
|
||||
|
||||
image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
|
||||
// GFX9: image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xac,0xf0,0xed,0xc1,0x27,0x00]
|
||||
// NOSICI: error: a16 modifier is not supported on this GPU
|
||||
// NOVI: error: a16 modifier is not supported on this GPU
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Image Atomics
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -372,3 +396,13 @@ image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1
|
||||
// NOGFX8_0: error: image data size does not match dmask and tfe
|
||||
// NOGFX8_1: error: image data size does not match dmask and tfe
|
||||
// NOGFX9: error: image data size does not match dmask and tfe
|
||||
|
||||
image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16
|
||||
// GFX9: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x00,0xf1,0x01,0x05,0x62,0x00]
|
||||
// NOSICI: error: a16 modifier is not supported on this GPU
|
||||
// NOVI: error: a16 modifier is not supported on this GPU
|
||||
|
||||
image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16
|
||||
// GFX9: image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x18,0xf1,0x01,0x05,0x62,0x00]
|
||||
// NOSICI: error: a16 modifier is not supported on this GPU
|
||||
// NOVI: error: a16 modifier is not supported on this GPU
|
||||
|
Loading…
x
Reference in New Issue
Block a user