mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
AMDGPU/SI: Add d16 support for image intrinsics.
Summary: This patch implements d16 support for image load, image store and image sample intrinsics. Reviewers: Matt, Brian. Differential Revision: https://reviews.llvm.org/D3991 llvm-svn: 322903
This commit is contained in:
parent
2cb6bd8f84
commit
48df1ecec5
@ -4002,6 +4002,83 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_OR)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
|
||||
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
|
||||
NODE_NAME_CASE(IMAGE_LOAD)
|
||||
NODE_NAME_CASE(IMAGE_LOAD_MIP)
|
||||
NODE_NAME_CASE(IMAGE_STORE)
|
||||
NODE_NAME_CASE(IMAGE_STORE_MIP)
|
||||
// Basic sample.
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_CL)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_D)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_D_CL)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_L)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_B)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_B_CL)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_LZ)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_CD)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL)
|
||||
// Sample with comparison.
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_CL)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_D)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_L)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_B)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_CD)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL)
|
||||
// Sample with offsets.
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_D_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_D_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_L_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_B_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_B_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_LZ_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_CD_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL_O)
|
||||
// Sample with comparison and offsets.
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_D_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_L_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_B_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_O)
|
||||
NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL_O)
|
||||
// Basic gather4.
|
||||
NODE_NAME_CASE(IMAGE_GATHER4)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_CL)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_L)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_B)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_B_CL)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_LZ)
|
||||
// Gather4 with comparison.
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_CL)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_L)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_B)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_LZ)
|
||||
// Gather4 with offsets.
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_L_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_B_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_B_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_LZ_O)
|
||||
// Gather4 with comparison and offsets.
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_L_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_B_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL_O)
|
||||
NODE_NAME_CASE(IMAGE_GATHER4_C_LZ_O)
|
||||
|
||||
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -477,6 +477,91 @@ enum NodeType : unsigned {
|
||||
BUFFER_ATOMIC_OR,
|
||||
BUFFER_ATOMIC_XOR,
|
||||
BUFFER_ATOMIC_CMPSWAP,
|
||||
IMAGE_LOAD,
|
||||
IMAGE_LOAD_MIP,
|
||||
IMAGE_STORE,
|
||||
IMAGE_STORE_MIP,
|
||||
|
||||
// Basic sample.
|
||||
IMAGE_SAMPLE,
|
||||
IMAGE_SAMPLE_CL,
|
||||
IMAGE_SAMPLE_D,
|
||||
IMAGE_SAMPLE_D_CL,
|
||||
IMAGE_SAMPLE_L,
|
||||
IMAGE_SAMPLE_B,
|
||||
IMAGE_SAMPLE_B_CL,
|
||||
IMAGE_SAMPLE_LZ,
|
||||
IMAGE_SAMPLE_CD,
|
||||
IMAGE_SAMPLE_CD_CL,
|
||||
|
||||
// Sample with comparison.
|
||||
IMAGE_SAMPLE_C,
|
||||
IMAGE_SAMPLE_C_CL,
|
||||
IMAGE_SAMPLE_C_D,
|
||||
IMAGE_SAMPLE_C_D_CL,
|
||||
IMAGE_SAMPLE_C_L,
|
||||
IMAGE_SAMPLE_C_B,
|
||||
IMAGE_SAMPLE_C_B_CL,
|
||||
IMAGE_SAMPLE_C_LZ,
|
||||
IMAGE_SAMPLE_C_CD,
|
||||
IMAGE_SAMPLE_C_CD_CL,
|
||||
|
||||
// Sample with offsets.
|
||||
IMAGE_SAMPLE_O,
|
||||
IMAGE_SAMPLE_CL_O,
|
||||
IMAGE_SAMPLE_D_O,
|
||||
IMAGE_SAMPLE_D_CL_O,
|
||||
IMAGE_SAMPLE_L_O,
|
||||
IMAGE_SAMPLE_B_O,
|
||||
IMAGE_SAMPLE_B_CL_O,
|
||||
IMAGE_SAMPLE_LZ_O,
|
||||
IMAGE_SAMPLE_CD_O,
|
||||
IMAGE_SAMPLE_CD_CL_O,
|
||||
|
||||
// Sample with comparison and offsets.
|
||||
IMAGE_SAMPLE_C_O,
|
||||
IMAGE_SAMPLE_C_CL_O,
|
||||
IMAGE_SAMPLE_C_D_O,
|
||||
IMAGE_SAMPLE_C_D_CL_O,
|
||||
IMAGE_SAMPLE_C_L_O,
|
||||
IMAGE_SAMPLE_C_B_O,
|
||||
IMAGE_SAMPLE_C_B_CL_O,
|
||||
IMAGE_SAMPLE_C_LZ_O,
|
||||
IMAGE_SAMPLE_C_CD_O,
|
||||
IMAGE_SAMPLE_C_CD_CL_O,
|
||||
|
||||
// Basic gather4.
|
||||
IMAGE_GATHER4,
|
||||
IMAGE_GATHER4_CL,
|
||||
IMAGE_GATHER4_L,
|
||||
IMAGE_GATHER4_B,
|
||||
IMAGE_GATHER4_B_CL,
|
||||
IMAGE_GATHER4_LZ,
|
||||
|
||||
// Gather4 with comparison.
|
||||
IMAGE_GATHER4_C,
|
||||
IMAGE_GATHER4_C_CL,
|
||||
IMAGE_GATHER4_C_L,
|
||||
IMAGE_GATHER4_C_B,
|
||||
IMAGE_GATHER4_C_B_CL,
|
||||
IMAGE_GATHER4_C_LZ,
|
||||
|
||||
// Gather4 with offsets.
|
||||
IMAGE_GATHER4_O,
|
||||
IMAGE_GATHER4_CL_O,
|
||||
IMAGE_GATHER4_L_O,
|
||||
IMAGE_GATHER4_B_O,
|
||||
IMAGE_GATHER4_B_CL_O,
|
||||
IMAGE_GATHER4_LZ_O,
|
||||
|
||||
// Gather4 with comparison and offsets.
|
||||
IMAGE_GATHER4_C_O,
|
||||
IMAGE_GATHER4_C_CL_O,
|
||||
IMAGE_GATHER4_C_L_O,
|
||||
IMAGE_GATHER4_C_B_O,
|
||||
IMAGE_GATHER4_C_B_CL_O,
|
||||
IMAGE_GATHER4_C_LZ_O,
|
||||
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
@ -32,26 +32,45 @@ class MIMG_Helper <dag outs, dag ins, string asm,
|
||||
class MIMG_NoSampler_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
RegisterClass addr_rc,
|
||||
bit d16_bit=0,
|
||||
string dns=""> : MIMG_Helper <
|
||||
(outs dst_rc:$vdata),
|
||||
(ins addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
|
||||
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
|
||||
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da",
|
||||
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
|
||||
dns>, MIMGe<op> {
|
||||
let ssamp = 0;
|
||||
let D16 = d16;
|
||||
}
|
||||
|
||||
multiclass MIMG_NoSampler_Src_Helper_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
int channels, bit d16_bit,
|
||||
string suffix> {
|
||||
def _V1 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, d16_bit,
|
||||
!if(!eq(channels, 1), "AMDGPU", "")>,
|
||||
MIMG_Mask<asm#"_V1"#suffix, channels>;
|
||||
def _V2 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, d16_bit>,
|
||||
MIMG_Mask<asm#"_V2"#suffix, channels>;
|
||||
def _V4 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, d16_bit>,
|
||||
MIMG_Mask<asm#"_V4"#suffix, channels>;
|
||||
}
|
||||
|
||||
multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
int channels> {
|
||||
def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
|
||||
!if(!eq(channels, 1), "AMDGPU", "")>,
|
||||
MIMG_Mask<asm#"_V1", channels>;
|
||||
def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
|
||||
MIMG_Mask<asm#"_V2", channels>;
|
||||
def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
|
||||
MIMG_Mask<asm#"_V4", channels>;
|
||||
defm : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 0, "">;
|
||||
|
||||
let d16 = 1 in {
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
|
||||
defm : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
} // End d16 = 1.
|
||||
}
|
||||
|
||||
multiclass MIMG_NoSampler <bits<7> op, string asm> {
|
||||
@ -64,30 +83,49 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> {
|
||||
class MIMG_Store_Helper <bits<7> op, string asm,
|
||||
RegisterClass data_rc,
|
||||
RegisterClass addr_rc,
|
||||
bit d16_bit=0,
|
||||
string dns = ""> : MIMG_Helper <
|
||||
(outs),
|
||||
(ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
|
||||
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
|
||||
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
|
||||
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> {
|
||||
let ssamp = 0;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 0;
|
||||
let hasPostISelHook = 0;
|
||||
let DisableWQM = 1;
|
||||
let D16 = d16;
|
||||
}
|
||||
|
||||
multiclass MIMG_Store_Addr_Helper_Helper <bits<7> op, string asm,
|
||||
RegisterClass data_rc,
|
||||
int channels, bit d16_bit,
|
||||
string suffix> {
|
||||
def _V1 # suffix : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, d16_bit,
|
||||
!if(!eq(channels, 1), "AMDGPU", "")>,
|
||||
MIMG_Mask<asm#"_V1"#suffix, channels>;
|
||||
def _V2 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_64, d16_bit>,
|
||||
MIMG_Mask<asm#"_V2"#suffix, channels>;
|
||||
def _V4 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_128, d16_bit>,
|
||||
MIMG_Mask<asm#"_V4"#suffix, channels>;
|
||||
}
|
||||
|
||||
multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
|
||||
RegisterClass data_rc,
|
||||
int channels> {
|
||||
def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
|
||||
!if(!eq(channels, 1), "AMDGPU", "")>,
|
||||
MIMG_Mask<asm#"_V1", channels>;
|
||||
def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>,
|
||||
MIMG_Mask<asm#"_V2", channels>;
|
||||
def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>,
|
||||
MIMG_Mask<asm#"_V4", channels>;
|
||||
defm : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 0, "">;
|
||||
|
||||
let d16 = 1 in {
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
|
||||
defm : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
} // End d16 = 1.
|
||||
}
|
||||
|
||||
multiclass MIMG_Store <bits<7> op, string asm> {
|
||||
@ -159,30 +197,49 @@ class MIMG_Sampler_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
RegisterClass src_rc,
|
||||
bit wqm,
|
||||
bit d16_bit=0,
|
||||
string dns=""> : MIMG_Helper <
|
||||
(outs dst_rc:$vdata),
|
||||
(ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
|
||||
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
|
||||
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
|
||||
asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da",
|
||||
asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
|
||||
dns>, MIMGe<op> {
|
||||
let WQM = wqm;
|
||||
let D16 = d16;
|
||||
}
|
||||
|
||||
multiclass MIMG_Sampler_Src_Helper_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
int channels, bit wqm,
|
||||
bit d16_bit, string suffix> {
|
||||
def _V1 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit,
|
||||
!if(!eq(channels, 1), "AMDGPU", "")>,
|
||||
MIMG_Mask<asm#"_V1"#suffix, channels>;
|
||||
def _V2 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V2"#suffix, channels>;
|
||||
def _V4 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V4"#suffix, channels>;
|
||||
def _V8 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V8"#suffix, channels>;
|
||||
def _V16 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V16"#suffix, channels>;
|
||||
}
|
||||
|
||||
multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
int channels, bit wqm> {
|
||||
def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm,
|
||||
!if(!eq(channels, 1), "AMDGPU", "")>,
|
||||
MIMG_Mask<asm#"_V1", channels>;
|
||||
def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
|
||||
MIMG_Mask<asm#"_V2", channels>;
|
||||
def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
|
||||
MIMG_Mask<asm#"_V4", channels>;
|
||||
def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
|
||||
MIMG_Mask<asm#"_V8", channels>;
|
||||
def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
|
||||
MIMG_Mask<asm#"_V16", channels>;
|
||||
defm : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 0, "">;
|
||||
|
||||
let d16 = 1 in {
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
|
||||
defm : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
} // End d16 = 1.
|
||||
}
|
||||
|
||||
multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm=0> {
|
||||
@ -196,12 +253,12 @@ multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>;
|
||||
|
||||
class MIMG_Gather_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
RegisterClass src_rc, bit wqm> : MIMG <
|
||||
RegisterClass src_rc, bit wqm, bit d16_bit=0> : MIMG <
|
||||
(outs dst_rc:$vdata),
|
||||
(ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
|
||||
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
|
||||
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
|
||||
asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da",
|
||||
asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
|
||||
[]>, MIMGe<op> {
|
||||
let mayLoad = 1;
|
||||
let mayStore = 0;
|
||||
@ -216,23 +273,42 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
|
||||
let Gather4 = 1;
|
||||
let hasPostISelHook = 0;
|
||||
let WQM = wqm;
|
||||
let D16 = d16;
|
||||
|
||||
let isAsmParserOnly = 1; // TBD: fix it later
|
||||
}
|
||||
|
||||
|
||||
multiclass MIMG_Gather_Src_Helper_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
int channels, bit wqm,
|
||||
bit d16_bit, string suffix> {
|
||||
def _V1 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V1"#suffix, channels>;
|
||||
def _V2 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V2"#suffix, channels>;
|
||||
def _V4 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V4"#suffix, channels>;
|
||||
def _V8 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V8"#suffix, channels>;
|
||||
def _V16 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>,
|
||||
MIMG_Mask<asm#"_V16"#suffix, channels>;
|
||||
}
|
||||
|
||||
multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
|
||||
RegisterClass dst_rc,
|
||||
int channels, bit wqm> {
|
||||
def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
|
||||
MIMG_Mask<asm#"_V1", channels>;
|
||||
def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
|
||||
MIMG_Mask<asm#"_V2", channels>;
|
||||
def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
|
||||
MIMG_Mask<asm#"_V4", channels>;
|
||||
def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
|
||||
MIMG_Mask<asm#"_V8", channels>;
|
||||
def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
|
||||
MIMG_Mask<asm#"_V16", channels>;
|
||||
defm : MIMG_Gather_Src_Helper_Helper<op, asm, dst_rc, channels, wqm, 0, "">;
|
||||
|
||||
let d16 = 1 in {
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : MIMG_Gather_Src_Helper_Helper<op, asm, dst_rc, channels, wqm, 1, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
|
||||
defm : MIMG_Gather_Src_Helper_Helper<op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
} // End d16 = 1.
|
||||
}
|
||||
|
||||
multiclass MIMG_Gather <bits<7> op, string asm, bit wqm=0> {
|
||||
@ -357,29 +433,11 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
|
||||
/********** Image sampling patterns **********/
|
||||
/********** ======================= **********/
|
||||
|
||||
// Image + sampler
|
||||
class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GCNPat <
|
||||
(name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
|
||||
i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
|
||||
(opcode $addr, $rsrc, $sampler,
|
||||
(as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
|
||||
(as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da))
|
||||
>;
|
||||
|
||||
multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
|
||||
def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
|
||||
def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
|
||||
def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
|
||||
def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>;
|
||||
def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>;
|
||||
}
|
||||
|
||||
// Image + sampler for amdgcn
|
||||
// ImageSample for amdgcn
|
||||
// TODO:
|
||||
// 1. Handle half data type like v4f16, and add D16 bit support;
|
||||
// 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
|
||||
// 3. Add A16 support when we pass address of half type.
|
||||
multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
// 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
|
||||
// 2. Add A16 support when we pass address of half type.
|
||||
multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
def : GCNPat<
|
||||
(dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
|
||||
i1:$slc, i1:$lwe, i1:$da)),
|
||||
@ -389,36 +447,44 @@ multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass AMDGCNSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32>;
|
||||
defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32>;
|
||||
multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
|
||||
defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, f32>;
|
||||
defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2f32>;
|
||||
defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4f32>;
|
||||
defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8 # suffix), dt, v8f32>;
|
||||
defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16 # suffix), dt, v16f32>;
|
||||
}
|
||||
|
||||
// TODO: support v3f32.
|
||||
multiclass AMDGCNSamplePatterns<SDPatternOperator name, string opcode> {
|
||||
defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
// ImageSample patterns.
|
||||
multiclass ImageSamplePatterns<SDPatternOperator name, string opcode> {
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
}
|
||||
|
||||
// Image only
|
||||
class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GCNPat <
|
||||
(name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm,
|
||||
imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe),
|
||||
(opcode $addr, $rsrc,
|
||||
(as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
|
||||
(as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da))
|
||||
>;
|
||||
// ImageSample alternative patterns for illegal vector half Types.
|
||||
multiclass ImageSampleAltPatterns<SDPatternOperator name, string opcode> {
|
||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
|
||||
multiclass ImagePatterns<SDPatternOperator name, string opcode> {
|
||||
def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
|
||||
def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
|
||||
def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">;
|
||||
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
}
|
||||
|
||||
// ImageLoad for amdgcn.
|
||||
multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
def : GCNPat <
|
||||
(dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
|
||||
@ -429,19 +495,43 @@ multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, V
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>;
|
||||
multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
|
||||
defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
|
||||
}
|
||||
|
||||
// ImageLoad patterns.
|
||||
// TODO: support v3f32.
|
||||
multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
}
|
||||
|
||||
// ImageLoad alternative patterns for illegal vector half Types.
|
||||
multiclass ImageLoadAltPatterns<SDPatternOperator name, string opcode> {
|
||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
|
||||
} // End HasUnPackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">;
|
||||
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
}
|
||||
|
||||
// ImageStore for amdgcn.
|
||||
multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
|
||||
def : GCNPat <
|
||||
(name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
|
||||
@ -452,30 +542,56 @@ multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt,
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>;
|
||||
multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
|
||||
defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
|
||||
}
|
||||
|
||||
// ImageStore patterns.
|
||||
// TODO: support v3f32.
|
||||
multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
}
|
||||
|
||||
// ImageStore alternative patterns.
|
||||
multiclass ImageStoreAltPatterns<SDPatternOperator name, string opcode> {
|
||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
|
||||
} // End HasUnpackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasPackedD16VMem in {
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">;
|
||||
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
|
||||
} // End HasPackedD16VMem.
|
||||
}
|
||||
|
||||
// ImageAtomic for amdgcn.
|
||||
class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : GCNPat <
|
||||
(name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc),
|
||||
(opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da))
|
||||
>;
|
||||
|
||||
// ImageAtomic patterns.
|
||||
multiclass ImageAtomicPatterns<SDPatternOperator name, string opcode> {
|
||||
def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1), i32>;
|
||||
def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V2), v2i32>;
|
||||
def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V4), v4i32>;
|
||||
}
|
||||
|
||||
// ImageAtomicCmpSwap for amdgcn.
|
||||
class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : GCNPat <
|
||||
(int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc,
|
||||
imm:$r128, imm:$da, imm:$slc),
|
||||
@ -487,93 +603,180 @@ class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : GCNPat <
|
||||
|
||||
// ======= amdgcn Image Intrinsics ==============
|
||||
|
||||
// Image load
|
||||
// Image load.
|
||||
defm : ImageLoadPatterns<int_amdgcn_image_load, "IMAGE_LOAD">;
|
||||
defm : ImageLoadPatterns<int_amdgcn_image_load_mip, "IMAGE_LOAD_MIP">;
|
||||
defm : ImageLoadPatterns<int_amdgcn_image_getresinfo, "IMAGE_GET_RESINFO">;
|
||||
defm : ImageLoadAltPatterns<SIImage_load, "IMAGE_LOAD">;
|
||||
defm : ImageLoadAltPatterns<SIImage_load_mip, "IMAGE_LOAD_MIP">;
|
||||
|
||||
// Image store
|
||||
defm : ImageStorePatterns<int_amdgcn_image_store, "IMAGE_STORE">;
|
||||
defm : ImageStorePatterns<int_amdgcn_image_store_mip, "IMAGE_STORE_MIP">;
|
||||
// Image store.
|
||||
defm : ImageStorePatterns<SIImage_store, "IMAGE_STORE">;
|
||||
defm : ImageStorePatterns<SIImage_store_mip, "IMAGE_STORE_MIP">;
|
||||
defm : ImageStoreAltPatterns<SIImage_store, "IMAGE_STORE">;
|
||||
defm : ImageStoreAltPatterns<SIImage_store_mip, "IMAGE_STORE_MIP">;
|
||||
|
||||
// Basic sample
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample, "IMAGE_SAMPLE">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cl, "IMAGE_SAMPLE_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d, "IMAGE_SAMPLE_D">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_l, "IMAGE_SAMPLE_L">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b, "IMAGE_SAMPLE_B">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_lz, "IMAGE_SAMPLE_LZ">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd, "IMAGE_SAMPLE_CD">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
|
||||
// Basic sample.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample, "IMAGE_SAMPLE">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_cl, "IMAGE_SAMPLE_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_d, "IMAGE_SAMPLE_D">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_l, "IMAGE_SAMPLE_L">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_b, "IMAGE_SAMPLE_B">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_lz, "IMAGE_SAMPLE_LZ">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_cd, "IMAGE_SAMPLE_CD">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
|
||||
|
||||
// Sample with comparison
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c, "IMAGE_SAMPLE_C">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d, "IMAGE_SAMPLE_C_D">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_l, "IMAGE_SAMPLE_C_L">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b, "IMAGE_SAMPLE_C_B">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
|
||||
// Sample with comparison.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c, "IMAGE_SAMPLE_C">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d, "IMAGE_SAMPLE_C_D">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_l, "IMAGE_SAMPLE_C_L">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b, "IMAGE_SAMPLE_C_B">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
|
||||
|
||||
// Sample with offsets
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_o, "IMAGE_SAMPLE_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_o, "IMAGE_SAMPLE_D_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_l_o, "IMAGE_SAMPLE_L_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_o, "IMAGE_SAMPLE_B_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
|
||||
// Sample with offsets.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_o, "IMAGE_SAMPLE_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_d_o, "IMAGE_SAMPLE_D_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_l_o, "IMAGE_SAMPLE_L_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_b_o, "IMAGE_SAMPLE_B_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
|
||||
|
||||
// Sample with comparison and offsets
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_o, "IMAGE_SAMPLE_C_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
|
||||
// Sample with comparison and offsets.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_o, "IMAGE_SAMPLE_C_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
|
||||
|
||||
// Gather opcodes
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4, "IMAGE_GATHER4">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl, "IMAGE_GATHER4_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l, "IMAGE_GATHER4_L">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b, "IMAGE_GATHER4_B">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl, "IMAGE_GATHER4_B_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz, "IMAGE_GATHER4_LZ">;
|
||||
// Basic gather4.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4, "IMAGE_GATHER4">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_cl, "IMAGE_GATHER4_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_l, "IMAGE_GATHER4_L">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_b, "IMAGE_GATHER4_B">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_b_cl, "IMAGE_GATHER4_B_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_lz, "IMAGE_GATHER4_LZ">;
|
||||
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c, "IMAGE_GATHER4_C">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl, "IMAGE_GATHER4_C_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l, "IMAGE_GATHER4_C_L">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b, "IMAGE_GATHER4_C_B">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz, "IMAGE_GATHER4_C_LZ">;
|
||||
// Gather4 with comparison.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c, "IMAGE_GATHER4_C">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_cl, "IMAGE_GATHER4_C_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_l, "IMAGE_GATHER4_C_L">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b, "IMAGE_GATHER4_C_B">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_lz, "IMAGE_GATHER4_C_LZ">;
|
||||
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_o, "IMAGE_GATHER4_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl_o, "IMAGE_GATHER4_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l_o, "IMAGE_GATHER4_L_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_o, "IMAGE_GATHER4_B_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz_o, "IMAGE_GATHER4_LZ_O">;
|
||||
// Gather4 with offsets.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_o, "IMAGE_GATHER4_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_cl_o, "IMAGE_GATHER4_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_l_o, "IMAGE_GATHER4_L_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_b_o, "IMAGE_GATHER4_B_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_lz_o, "IMAGE_GATHER4_LZ_O">;
|
||||
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_o, "IMAGE_GATHER4_C_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">;
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">;
|
||||
// Gather4 with comparison and offsets.
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_o, "IMAGE_GATHER4_C_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">;
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">;
|
||||
|
||||
defm : AMDGCNSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">;
|
||||
// Basic sample alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_sample, "IMAGE_SAMPLE">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_cl, "IMAGE_SAMPLE_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_d, "IMAGE_SAMPLE_D">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_l, "IMAGE_SAMPLE_L">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_b, "IMAGE_SAMPLE_B">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_lz, "IMAGE_SAMPLE_LZ">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_cd, "IMAGE_SAMPLE_CD">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
|
||||
|
||||
// Sample with comparison alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c, "IMAGE_SAMPLE_C">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_d, "IMAGE_SAMPLE_C_D">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_l, "IMAGE_SAMPLE_C_L">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_b, "IMAGE_SAMPLE_C_B">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
|
||||
|
||||
// Sample with offsets alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_o, "IMAGE_SAMPLE_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_d_o, "IMAGE_SAMPLE_D_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_l_o, "IMAGE_SAMPLE_L_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_b_o, "IMAGE_SAMPLE_B_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
|
||||
|
||||
// Sample with comparison and offsets alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_o, "IMAGE_SAMPLE_C_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
|
||||
|
||||
// Basic gather4 alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4, "IMAGE_GATHER4">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_cl, "IMAGE_GATHER4_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_l, "IMAGE_GATHER4_L">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_b, "IMAGE_GATHER4_B">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_b_cl, "IMAGE_GATHER4_B_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_lz, "IMAGE_GATHER4_LZ">;
|
||||
|
||||
// Gather4 with comparison alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c, "IMAGE_GATHER4_C">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_cl, "IMAGE_GATHER4_C_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_l, "IMAGE_GATHER4_C_L">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_b, "IMAGE_GATHER4_C_B">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_b_cl, "IMAGE_GATHER4_C_B_CL">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_lz, "IMAGE_GATHER4_C_LZ">;
|
||||
|
||||
// Gather4 with offsets alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_o, "IMAGE_GATHER4_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_cl_o, "IMAGE_GATHER4_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_l_o, "IMAGE_GATHER4_L_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_b_o, "IMAGE_GATHER4_B_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_b_cl_o, "IMAGE_GATHER4_B_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_lz_o, "IMAGE_GATHER4_LZ_O">;
|
||||
|
||||
// Gather4 with comparison and offsets alternative.
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_o, "IMAGE_GATHER4_C_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_cl_o, "IMAGE_GATHER4_C_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_l_o, "IMAGE_GATHER4_C_L_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_b_o, "IMAGE_GATHER4_C_B_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_b_cl_o, "IMAGE_GATHER4_C_B_CL_O">;
|
||||
defm : ImageSampleAltPatterns<SIImage_gather4_c_lz_o, "IMAGE_GATHER4_C_LZ_O">;
|
||||
|
||||
defm : ImageSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">;
|
||||
|
||||
// Image atomics
|
||||
defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">;
|
||||
|
@ -85,7 +85,10 @@ enum : uint64_t {
|
||||
ClampHi = UINT64_C(1) << 48,
|
||||
|
||||
// Is a packed VOP3P instruction.
|
||||
IsPacked = UINT64_C(1) << 49
|
||||
IsPacked = UINT64_C(1) << 49,
|
||||
|
||||
// "d16" bit set or not.
|
||||
D16 = UINT64_C(1) << 50
|
||||
};
|
||||
|
||||
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
|
||||
|
@ -3510,6 +3510,163 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static unsigned getImageOpcode(unsigned IID) {
|
||||
switch (IID) {
|
||||
case Intrinsic::amdgcn_image_load:
|
||||
return AMDGPUISD::IMAGE_LOAD;
|
||||
case Intrinsic::amdgcn_image_load_mip:
|
||||
return AMDGPUISD::IMAGE_LOAD_MIP;
|
||||
|
||||
// Basic sample.
|
||||
case Intrinsic::amdgcn_image_sample:
|
||||
return AMDGPUISD::IMAGE_SAMPLE;
|
||||
case Intrinsic::amdgcn_image_sample_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_CL;
|
||||
case Intrinsic::amdgcn_image_sample_d:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_D;
|
||||
case Intrinsic::amdgcn_image_sample_d_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_D_CL;
|
||||
case Intrinsic::amdgcn_image_sample_l:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_L;
|
||||
case Intrinsic::amdgcn_image_sample_b:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_B;
|
||||
case Intrinsic::amdgcn_image_sample_b_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_B_CL;
|
||||
case Intrinsic::amdgcn_image_sample_lz:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_LZ;
|
||||
case Intrinsic::amdgcn_image_sample_cd:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_CD;
|
||||
case Intrinsic::amdgcn_image_sample_cd_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_CD_CL;
|
||||
|
||||
// Sample with comparison.
|
||||
case Intrinsic::amdgcn_image_sample_c:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C;
|
||||
case Intrinsic::amdgcn_image_sample_c_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_CL;
|
||||
case Intrinsic::amdgcn_image_sample_c_d:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_D;
|
||||
case Intrinsic::amdgcn_image_sample_c_d_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_D_CL;
|
||||
case Intrinsic::amdgcn_image_sample_c_l:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_L;
|
||||
case Intrinsic::amdgcn_image_sample_c_b:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_B;
|
||||
case Intrinsic::amdgcn_image_sample_c_b_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_B_CL;
|
||||
case Intrinsic::amdgcn_image_sample_c_lz:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_LZ;
|
||||
case Intrinsic::amdgcn_image_sample_c_cd:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_CD;
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_cl:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL;
|
||||
|
||||
// Sample with offsets.
|
||||
case Intrinsic::amdgcn_image_sample_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_O;
|
||||
case Intrinsic::amdgcn_image_sample_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_CL_O;
|
||||
case Intrinsic::amdgcn_image_sample_d_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_D_O;
|
||||
case Intrinsic::amdgcn_image_sample_d_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_D_CL_O;
|
||||
case Intrinsic::amdgcn_image_sample_l_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_L_O;
|
||||
case Intrinsic::amdgcn_image_sample_b_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_B_O;
|
||||
case Intrinsic::amdgcn_image_sample_b_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_B_CL_O;
|
||||
case Intrinsic::amdgcn_image_sample_lz_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_LZ_O;
|
||||
case Intrinsic::amdgcn_image_sample_cd_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_CD_O;
|
||||
case Intrinsic::amdgcn_image_sample_cd_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_CD_CL_O;
|
||||
|
||||
// Sample with comparison and offsets.
|
||||
case Intrinsic::amdgcn_image_sample_c_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_CL_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_d_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_D_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_d_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_l_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_L_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_b_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_B_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_b_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_lz_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_LZ_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_CD_O;
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
|
||||
return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O;
|
||||
|
||||
// Basic gather4.
|
||||
case Intrinsic::amdgcn_image_gather4:
|
||||
return AMDGPUISD::IMAGE_GATHER4;
|
||||
case Intrinsic::amdgcn_image_gather4_cl:
|
||||
return AMDGPUISD::IMAGE_GATHER4_CL;
|
||||
case Intrinsic::amdgcn_image_gather4_l:
|
||||
return AMDGPUISD::IMAGE_GATHER4_L;
|
||||
case Intrinsic::amdgcn_image_gather4_b:
|
||||
return AMDGPUISD::IMAGE_GATHER4_B;
|
||||
case Intrinsic::amdgcn_image_gather4_b_cl:
|
||||
return AMDGPUISD::IMAGE_GATHER4_B_CL;
|
||||
case Intrinsic::amdgcn_image_gather4_lz:
|
||||
return AMDGPUISD::IMAGE_GATHER4_LZ;
|
||||
|
||||
// Gather4 with comparison.
|
||||
case Intrinsic::amdgcn_image_gather4_c:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C;
|
||||
case Intrinsic::amdgcn_image_gather4_c_cl:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_CL;
|
||||
case Intrinsic::amdgcn_image_gather4_c_l:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_L;
|
||||
case Intrinsic::amdgcn_image_gather4_c_b:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_B;
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_cl:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_B_CL;
|
||||
case Intrinsic::amdgcn_image_gather4_c_lz:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_LZ;
|
||||
|
||||
// Gather4 with offsets.
|
||||
case Intrinsic::amdgcn_image_gather4_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_O;
|
||||
case Intrinsic::amdgcn_image_gather4_cl_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_CL_O;
|
||||
case Intrinsic::amdgcn_image_gather4_l_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_L_O;
|
||||
case Intrinsic::amdgcn_image_gather4_b_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_B_O;
|
||||
case Intrinsic::amdgcn_image_gather4_b_cl_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_B_CL_O;
|
||||
case Intrinsic::amdgcn_image_gather4_lz_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_LZ_O;
|
||||
|
||||
// Gather4 with comparison and offsets.
|
||||
case Intrinsic::amdgcn_image_gather4_c_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_O;
|
||||
case Intrinsic::amdgcn_image_gather4_c_cl_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_CL_O;
|
||||
case Intrinsic::amdgcn_image_gather4_c_l_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_L_O;
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_B_O;
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_B_CL_O;
|
||||
case Intrinsic::amdgcn_image_gather4_c_lz_o:
|
||||
return AMDGPUISD::IMAGE_GATHER4_C_LZ_O;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static SDValue adjustLoadValueType(SDValue Result, EVT LoadVT, SDLoc DL,
|
||||
SelectionDAG &DAG, bool Unpacked) {
|
||||
if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16.
|
||||
@ -3545,16 +3702,16 @@ SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op,
|
||||
switch (IID) {
|
||||
case Intrinsic::amdgcn_tbuffer_load: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // rsrc
|
||||
Op.getOperand(3), // vindex
|
||||
Op.getOperand(4), // voffset
|
||||
Op.getOperand(5), // soffset
|
||||
Op.getOperand(6), // offset
|
||||
Op.getOperand(7), // dfmt
|
||||
Op.getOperand(8), // nfmt
|
||||
Op.getOperand(9), // glc
|
||||
Op.getOperand(10) // slc
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // rsrc
|
||||
Op.getOperand(3), // vindex
|
||||
Op.getOperand(4), // voffset
|
||||
Op.getOperand(5), // soffset
|
||||
Op.getOperand(6), // offset
|
||||
Op.getOperand(7), // dfmt
|
||||
Op.getOperand(8), // nfmt
|
||||
Op.getOperand(9), // glc
|
||||
Op.getOperand(10) // slc
|
||||
};
|
||||
Res = DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, DL,
|
||||
VTList, Ops, M->getMemoryVT(),
|
||||
@ -3563,19 +3720,134 @@ SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op,
|
||||
return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
|
||||
}
|
||||
case Intrinsic::amdgcn_buffer_load_format: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // rsrc
|
||||
Op.getOperand(3), // vindex
|
||||
Op.getOperand(4), // offset
|
||||
Op.getOperand(5), // glc
|
||||
Op.getOperand(6) // slc
|
||||
};
|
||||
Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
|
||||
DL, VTList, Ops, M->getMemoryVT(),
|
||||
M->getMemOperand());
|
||||
Chain = Res.getValue(1);
|
||||
return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // rsrc
|
||||
Op.getOperand(3), // vindex
|
||||
Op.getOperand(4), // offset
|
||||
Op.getOperand(5), // glc
|
||||
Op.getOperand(6) // slc
|
||||
};
|
||||
Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
|
||||
DL, VTList, Ops, M->getMemoryVT(),
|
||||
M->getMemOperand());
|
||||
Chain = Res.getValue(1);
|
||||
return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
|
||||
}
|
||||
case Intrinsic::amdgcn_image_load:
|
||||
case Intrinsic::amdgcn_image_load_mip: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // vaddr
|
||||
Op.getOperand(3), // rsrc
|
||||
Op.getOperand(4), // dmask
|
||||
Op.getOperand(5), // glc
|
||||
Op.getOperand(6), // slc
|
||||
Op.getOperand(7), // lwe
|
||||
Op.getOperand(8) // da
|
||||
};
|
||||
unsigned Opc = getImageOpcode(IID);
|
||||
Res = DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, M->getMemoryVT(),
|
||||
M->getMemOperand());
|
||||
Chain = Res.getValue(1);
|
||||
return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
|
||||
}
|
||||
// Basic sample.
|
||||
case Intrinsic::amdgcn_image_sample:
|
||||
case Intrinsic::amdgcn_image_sample_cl:
|
||||
case Intrinsic::amdgcn_image_sample_d:
|
||||
case Intrinsic::amdgcn_image_sample_d_cl:
|
||||
case Intrinsic::amdgcn_image_sample_l:
|
||||
case Intrinsic::amdgcn_image_sample_b:
|
||||
case Intrinsic::amdgcn_image_sample_b_cl:
|
||||
case Intrinsic::amdgcn_image_sample_lz:
|
||||
case Intrinsic::amdgcn_image_sample_cd:
|
||||
case Intrinsic::amdgcn_image_sample_cd_cl:
|
||||
|
||||
// Sample with comparison.
|
||||
case Intrinsic::amdgcn_image_sample_c:
|
||||
case Intrinsic::amdgcn_image_sample_c_cl:
|
||||
case Intrinsic::amdgcn_image_sample_c_d:
|
||||
case Intrinsic::amdgcn_image_sample_c_d_cl:
|
||||
case Intrinsic::amdgcn_image_sample_c_l:
|
||||
case Intrinsic::amdgcn_image_sample_c_b:
|
||||
case Intrinsic::amdgcn_image_sample_c_b_cl:
|
||||
case Intrinsic::amdgcn_image_sample_c_lz:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_cl:
|
||||
|
||||
// Sample with offsets.
|
||||
case Intrinsic::amdgcn_image_sample_o:
|
||||
case Intrinsic::amdgcn_image_sample_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_d_o:
|
||||
case Intrinsic::amdgcn_image_sample_d_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_l_o:
|
||||
case Intrinsic::amdgcn_image_sample_b_o:
|
||||
case Intrinsic::amdgcn_image_sample_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_lz_o:
|
||||
case Intrinsic::amdgcn_image_sample_cd_o:
|
||||
case Intrinsic::amdgcn_image_sample_cd_cl_o:
|
||||
|
||||
// Sample with comparison and offsets.
|
||||
case Intrinsic::amdgcn_image_sample_c_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_d_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_d_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_l_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_b_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_lz_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
|
||||
|
||||
// Basic gather4
|
||||
case Intrinsic::amdgcn_image_gather4:
|
||||
case Intrinsic::amdgcn_image_gather4_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_l:
|
||||
case Intrinsic::amdgcn_image_gather4_b:
|
||||
case Intrinsic::amdgcn_image_gather4_b_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_lz:
|
||||
|
||||
// Gather4 with comparison
|
||||
case Intrinsic::amdgcn_image_gather4_c:
|
||||
case Intrinsic::amdgcn_image_gather4_c_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_c_l:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_c_lz:
|
||||
|
||||
// Gather4 with offsets
|
||||
case Intrinsic::amdgcn_image_gather4_o:
|
||||
case Intrinsic::amdgcn_image_gather4_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_l_o:
|
||||
case Intrinsic::amdgcn_image_gather4_b_o:
|
||||
case Intrinsic::amdgcn_image_gather4_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_lz_o:
|
||||
|
||||
// Gather4 with comparison and offsets
|
||||
case Intrinsic::amdgcn_image_gather4_c_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_l_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_lz_o: {
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // vaddr
|
||||
Op.getOperand(3), // rsrc
|
||||
Op.getOperand(4), // sampler
|
||||
Op.getOperand(5), // dmask
|
||||
Op.getOperand(6), // unorm
|
||||
Op.getOperand(7), // glc
|
||||
Op.getOperand(8), // slc
|
||||
Op.getOperand(9), // lwe
|
||||
Op.getOperand(10) // da
|
||||
};
|
||||
unsigned Opc = getImageOpcode(IID);
|
||||
Res = DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, M->getMemoryVT(),
|
||||
M->getMemOperand());
|
||||
Chain = Res.getValue(1);
|
||||
return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
@ -4982,6 +5254,30 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
M->getMemoryVT(), M->getMemOperand());
|
||||
}
|
||||
|
||||
case Intrinsic::amdgcn_image_store:
|
||||
case Intrinsic::amdgcn_image_store_mip: {
|
||||
SDValue VData = Op.getOperand(2);
|
||||
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
|
||||
if (IsD16)
|
||||
VData = handleD16VData(VData, DAG);
|
||||
SDValue Ops[] = {
|
||||
Chain, // Chain
|
||||
VData, // vdata
|
||||
Op.getOperand(3), // vaddr
|
||||
Op.getOperand(4), // rsrc
|
||||
Op.getOperand(5), // dmask
|
||||
Op.getOperand(6), // glc
|
||||
Op.getOperand(7), // slc
|
||||
Op.getOperand(8), // lwe
|
||||
Op.getOperand(9) // da
|
||||
};
|
||||
unsigned Opc = (IntrinsicID==Intrinsic::amdgcn_image_store) ?
|
||||
AMDGPUISD::IMAGE_STORE : AMDGPUISD::IMAGE_STORE_MIP;
|
||||
MemSDNode *M = cast<MemSDNode>(Op);
|
||||
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
|
||||
M->getMemoryVT(), M->getMemOperand());
|
||||
}
|
||||
|
||||
default:
|
||||
return Op;
|
||||
}
|
||||
@ -7101,7 +7397,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||
unsigned Opcode = Node->getMachineOpcode();
|
||||
|
||||
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
|
||||
!TII->isGather4(Opcode)) {
|
||||
!TII->isGather4(Opcode) && !TII->isD16(Opcode)) {
|
||||
return adjustWritemask(Node, DAG);
|
||||
}
|
||||
|
||||
|
@ -118,6 +118,9 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
// This bit indicates that this is a packed VOP3P instruction
|
||||
field bit IsPacked = 0;
|
||||
|
||||
// This bit indicates that this is a D16 instruction.
|
||||
field bit D16 = 0;
|
||||
|
||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||
let TSFlags{0} = SALU;
|
||||
let TSFlags{1} = VALU;
|
||||
@ -173,6 +176,8 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
|
||||
let TSFlags{49} = IsPacked;
|
||||
|
||||
let TSFlags{50} = D16;
|
||||
|
||||
let SchedRW = [Write32Bit];
|
||||
|
||||
field bits<1> DisableSIDecoder = 0;
|
||||
@ -247,6 +252,7 @@ class MIMGe <bits<7> op> : Enc64 {
|
||||
bits<1> tfe;
|
||||
bits<1> lwe;
|
||||
bits<1> slc;
|
||||
bits<1> d16 = 0;
|
||||
bits<8> vaddr;
|
||||
bits<7> srsrc;
|
||||
bits<7> ssamp;
|
||||
@ -265,6 +271,7 @@ class MIMGe <bits<7> op> : Enc64 {
|
||||
let Inst{47-40} = vdata;
|
||||
let Inst{52-48} = srsrc{6-2};
|
||||
let Inst{57-53} = ssamp{6-2};
|
||||
let Inst{63} = d16;
|
||||
}
|
||||
|
||||
class EXPe : Enc64 {
|
||||
|
@ -456,6 +456,14 @@ public:
|
||||
return get(Opcode).TSFlags & SIInstrFlags::Gather4;
|
||||
}
|
||||
|
||||
static bool isD16(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::D16;
|
||||
}
|
||||
|
||||
bool isD16(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::D16;
|
||||
}
|
||||
|
||||
static bool isFLAT(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
|
||||
}
|
||||
|
@ -170,6 +170,134 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
|
||||
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
||||
>;
|
||||
|
||||
def SDTImage_load : SDTypeProfile<1, 7,
|
||||
[
|
||||
SDTCisInt<1>, // vaddr
|
||||
SDTCisInt<2>, // rsrc
|
||||
SDTCisVT<3, i32>, // dmask
|
||||
SDTCisVT<4, i1>, // glc
|
||||
SDTCisVT<5, i1>, // slc
|
||||
SDTCisVT<6, i1>, // lwe
|
||||
SDTCisVT<7, i1> // da
|
||||
]>;
|
||||
def SIImage_load : SDNode<"AMDGPUISD::IMAGE_LOAD", SDTImage_load,
|
||||
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
|
||||
def SIImage_load_mip : SDNode<"AMDGPUISD::IMAGE_LOAD_MIP", SDTImage_load,
|
||||
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
|
||||
|
||||
def SDTImage_store : SDTypeProfile<0, 8,
|
||||
[
|
||||
SDTCisInt<1>, // vaddr
|
||||
SDTCisInt<2>, // rsrc
|
||||
SDTCisVT<3, i32>, // dmask
|
||||
SDTCisVT<4, i1>, // glc
|
||||
SDTCisVT<5, i1>, // slc
|
||||
SDTCisVT<6, i1>, // lwe
|
||||
SDTCisVT<7, i1> // da
|
||||
]>;
|
||||
def SIImage_store : SDNode <"AMDGPUISD::IMAGE_STORE",
|
||||
SDTImage_store,
|
||||
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
||||
def SIImage_store_mip : SDNode <"AMDGPUISD::IMAGE_STORE_MIP",
|
||||
SDTImage_store,
|
||||
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
||||
|
||||
class SDTImage_sample<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 9,
|
||||
[
|
||||
SDTCisFP<1>, // vaddr
|
||||
SDTCisInt<2>, // rsrc
|
||||
SDTCisVT<3, v4i32>, // sampler
|
||||
SDTCisVT<4, i32>, // dmask
|
||||
SDTCisVT<5, i1>, // unorm
|
||||
SDTCisVT<6, i1>, // glc
|
||||
SDTCisVT<7, i1>, // slc
|
||||
SDTCisVT<8, i1>, // lwe
|
||||
SDTCisVT<9, i1> // da
|
||||
]>,
|
||||
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
||||
>;
|
||||
|
||||
// Basic sample.
|
||||
def SIImage_sample : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE">;
|
||||
def SIImage_sample_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL">;
|
||||
def SIImage_sample_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D">;
|
||||
def SIImage_sample_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL">;
|
||||
def SIImage_sample_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L">;
|
||||
def SIImage_sample_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B">;
|
||||
def SIImage_sample_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL">;
|
||||
def SIImage_sample_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ">;
|
||||
def SIImage_sample_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD">;
|
||||
def SIImage_sample_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL">;
|
||||
|
||||
// Sample with comparison.
|
||||
def SIImage_sample_c : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C">;
|
||||
def SIImage_sample_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL">;
|
||||
def SIImage_sample_c_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D">;
|
||||
def SIImage_sample_c_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL">;
|
||||
def SIImage_sample_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L">;
|
||||
def SIImage_sample_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B">;
|
||||
def SIImage_sample_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL">;
|
||||
def SIImage_sample_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ">;
|
||||
def SIImage_sample_c_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD">;
|
||||
def SIImage_sample_c_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL">;
|
||||
|
||||
// Sample with offsets.
|
||||
def SIImage_sample_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_O">;
|
||||
def SIImage_sample_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL_O">;
|
||||
def SIImage_sample_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_O">;
|
||||
def SIImage_sample_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL_O">;
|
||||
def SIImage_sample_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L_O">;
|
||||
def SIImage_sample_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_O">;
|
||||
def SIImage_sample_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL_O">;
|
||||
def SIImage_sample_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ_O">;
|
||||
def SIImage_sample_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_O">;
|
||||
def SIImage_sample_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL_O">;
|
||||
|
||||
// Sample with comparison and offsets.
|
||||
def SIImage_sample_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_O">;
|
||||
def SIImage_sample_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL_O">;
|
||||
def SIImage_sample_c_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_O">;
|
||||
def SIImage_sample_c_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O">;
|
||||
def SIImage_sample_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L_O">;
|
||||
def SIImage_sample_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_O">;
|
||||
def SIImage_sample_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O">;
|
||||
def SIImage_sample_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ_O">;
|
||||
def SIImage_sample_c_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_O">;
|
||||
def SIImage_sample_c_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O">;
|
||||
|
||||
// Basic gather4.
|
||||
def SIImage_gather4 : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4">;
|
||||
def SIImage_gather4_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL">;
|
||||
def SIImage_gather4_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L">;
|
||||
def SIImage_gather4_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B">;
|
||||
def SIImage_gather4_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL">;
|
||||
def SIImage_gather4_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ">;
|
||||
|
||||
// Gather4 with comparison.
|
||||
def SIImage_gather4_c : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C">;
|
||||
def SIImage_gather4_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL">;
|
||||
def SIImage_gather4_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L">;
|
||||
def SIImage_gather4_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B">;
|
||||
def SIImage_gather4_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL">;
|
||||
def SIImage_gather4_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ">;
|
||||
|
||||
// Gather4 with offsets.
|
||||
def SIImage_gather4_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_O">;
|
||||
def SIImage_gather4_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL_O">;
|
||||
def SIImage_gather4_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L_O">;
|
||||
def SIImage_gather4_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_O">;
|
||||
def SIImage_gather4_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL_O">;
|
||||
def SIImage_gather4_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ_O">;
|
||||
|
||||
// Gather4 with comparison and offsets.
|
||||
def SIImage_gather4_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_O">;
|
||||
def SIImage_gather4_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL_O">;
|
||||
def SIImage_gather4_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L_O">;
|
||||
def SIImage_gather4_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_O">;
|
||||
def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">;
|
||||
def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">;
|
||||
|
||||
class SDSample<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
|
||||
SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
|
||||
|
125
test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll
Normal file
125
test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll
Normal file
@ -0,0 +1,125 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_f16
|
||||
; GCN: image_load v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16
|
||||
define amdgpu_ps half @image_load_f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%tex = call half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
|
||||
ret half %tex
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_v2f16:
|
||||
; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16
|
||||
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
|
||||
|
||||
; PACKED: image_load v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16
|
||||
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
|
||||
define amdgpu_ps half @image_load_v2f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%tex = call <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
|
||||
%elt = extractelement <2 x half> %tex, i32 1
|
||||
ret half %elt
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_v4f16:
|
||||
; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
|
||||
|
||||
; PACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
|
||||
define amdgpu_ps half @image_load_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
ret half %elt
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_load_mip_v4f16:
|
||||
; UNPACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
|
||||
|
||||
; PACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
|
||||
define amdgpu_ps half @image_load_mip_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
ret half %elt
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_f16
|
||||
; GCN: v_trunc_f16_e32 v[[LO:[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: image_store v[[LO]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16
|
||||
define amdgpu_kernel void @image_store_f16(half %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_v2f16
|
||||
|
||||
; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: image_store v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16
|
||||
|
||||
; PACKED: image_store v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16
|
||||
define amdgpu_kernel void @image_store_v2f16(<2 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_v4f16
|
||||
|
||||
; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: image_store v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
|
||||
; GFX81: v_or_b32_e32 v[[HI:[0-9]+]]
|
||||
; GFX81: v_or_b32_e32 v[[LO:[0-9]+]]
|
||||
|
||||
; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]]
|
||||
; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]]
|
||||
|
||||
; PACKED: image_store v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
define amdgpu_kernel void @image_store_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_store_mip_v4f16
|
||||
|
||||
; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
|
||||
; UNPACKED: image_store_mip v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
|
||||
; GFX81: v_or_b32_e32 v[[HI:[0-9]+]]
|
||||
; GFX81: v_or_b32_e32 v[[LO:[0-9]+]]
|
||||
|
||||
; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]]
|
||||
; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]]
|
||||
|
||||
; PACKED: image_store_mip v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16
|
||||
define amdgpu_kernel void @image_store_mip_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
declare <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
|
||||
declare void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
declare void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
declare void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
||||
declare void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1)
|
137
test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll
Normal file
137
test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll
Normal file
@ -0,0 +1,137 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_f16:
|
||||
; GCN: image_gather4 v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16
|
||||
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_gather4_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call half @llvm.amdgcn.image.gather4.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
store half %tex, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_v2f16:
|
||||
; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_gather4 v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16
|
||||
|
||||
; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]]
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off
|
||||
define amdgpu_kernel void @image_gather4_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <2 x half> @llvm.amdgcn.image.gather4.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <2 x half> %tex, i32 1
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_v4f16:
|
||||
; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_gather4_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_cl_v4f16:
|
||||
; UNPACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_gather4_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_c_v4f16:
|
||||
; UNPACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_gather4_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_o_v4f16:
|
||||
; UNPACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_gather4_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_gather4_c_o_v4f16:
|
||||
; UNPACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_gather4_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare half @llvm.amdgcn.image.gather4.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <2 x half> @llvm.amdgcn.image.gather4.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
|
||||
|
||||
declare <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
135
test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll
Normal file
135
test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll
Normal file
@ -0,0 +1,135 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_f16:
|
||||
; GCN: image_sample v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16
|
||||
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_sample_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
store half %tex, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_v2f16:
|
||||
; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_sample v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16
|
||||
|
||||
; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]]
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off
|
||||
define amdgpu_kernel void @image_sample_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <2 x half> %tex, i32 1
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_v4f16:
|
||||
; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_sample_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_cl_v4f16:
|
||||
; UNPACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_sample_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_c_v4f16:
|
||||
; UNPACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_sample_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_o_v4f16:
|
||||
; UNPACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_sample_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}image_sample_c_o_v4f16:
|
||||
; UNPACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
|
||||
|
||||
; PACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
|
||||
; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
|
||||
|
||||
; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
|
||||
; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
|
||||
define amdgpu_kernel void @image_sample_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
|
||||
main_body:
|
||||
%tex = call <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
|
||||
%elt = extractelement <4 x half> %tex, i32 3
|
||||
store half %elt, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
|
||||
|
||||
declare <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
||||
declare <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
|
Loading…
Reference in New Issue
Block a user