mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
AMDGPU: Tidy minor td file issues
Make comments and indentation more consistent. Rearrange a few things to be in a more consistent order, such as organizing subtarget features from those describing an actual device property, and those used as options. llvm-svn: 258789
This commit is contained in:
parent
51a14cbbc7
commit
f5c703425b
@ -1,183 +1,115 @@
|
||||
//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
|
||||
//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//===------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget Features
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Debugging Features
|
||||
|
||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||
"DumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter">;
|
||||
|
||||
def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
|
||||
"DumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter">;
|
||||
|
||||
def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
|
||||
"EnableIRStructurizer",
|
||||
"false",
|
||||
"Disable IR Structurizer">;
|
||||
|
||||
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
|
||||
"EnablePromoteAlloca",
|
||||
"true",
|
||||
"Enable promote alloca pass">;
|
||||
|
||||
// Target features
|
||||
|
||||
def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
|
||||
"EnableIfCvt",
|
||||
"false",
|
||||
"Disable the if conversion pass">;
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (device properties)
|
||||
//===------------------------------------------------------------===//
|
||||
|
||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"FP64",
|
||||
"true",
|
||||
"Enable double precision operations">;
|
||||
|
||||
def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
|
||||
"FP64Denormals",
|
||||
"true",
|
||||
"Enable double precision denormal handling",
|
||||
[FeatureFP64]>;
|
||||
"FP64",
|
||||
"true",
|
||||
"Enable double precision operations"
|
||||
>;
|
||||
|
||||
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
||||
"FastFMAF32",
|
||||
"true",
|
||||
"Assuming f32 fma is at least as fast as mul + add",
|
||||
[]>;
|
||||
"FastFMAF32",
|
||||
"true",
|
||||
"Assuming f32 fma is at least as fast as mul + add"
|
||||
>;
|
||||
|
||||
def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
|
||||
"HalfRate64Ops",
|
||||
"true",
|
||||
"Most fp64 instructions are half rate instead of quarter",
|
||||
[]>;
|
||||
|
||||
// Some instructions do not support denormals despite this flag. Using
|
||||
// fp32 denormals also causes instructions to run at the double
|
||||
// precision rate for the device.
|
||||
def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
|
||||
"FP32Denormals",
|
||||
"true",
|
||||
"Enable single precision denormal handling">;
|
||||
"HalfRate64Ops",
|
||||
"true",
|
||||
"Most fp64 instructions are half rate instead of quarter"
|
||||
>;
|
||||
|
||||
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
||||
"R600ALUInst",
|
||||
"false",
|
||||
"Older version of ALU instructions encoding">;
|
||||
"R600ALUInst",
|
||||
"false",
|
||||
"Older version of ALU instructions encoding"
|
||||
>;
|
||||
|
||||
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
|
||||
"HasVertexCache",
|
||||
"true",
|
||||
"Specify use of dedicated vertex cache">;
|
||||
"HasVertexCache",
|
||||
"true",
|
||||
"Specify use of dedicated vertex cache"
|
||||
>;
|
||||
|
||||
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
|
||||
"CaymanISA",
|
||||
"true",
|
||||
"Use Cayman ISA">;
|
||||
"CaymanISA",
|
||||
"true",
|
||||
"Use Cayman ISA"
|
||||
>;
|
||||
|
||||
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
|
||||
"CFALUBug",
|
||||
"true",
|
||||
"GPU has CF_ALU bug">;
|
||||
|
||||
// XXX - This should probably be removed once enabled by default
|
||||
def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
|
||||
"EnableLoadStoreOpt",
|
||||
"true",
|
||||
"Enable SI load/store optimizer pass">;
|
||||
|
||||
// Performance debugging feature. Allow using DS instruction immediate
|
||||
// offsets even if the base pointer can't be proven to be base. On SI,
|
||||
// base pointer values that won't give the same result as a 16-bit add
|
||||
// are not safe to fold, but this will override the conservative test
|
||||
// for the base pointer.
|
||||
def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
|
||||
"EnableUnsafeDSOffsetFolding",
|
||||
"true",
|
||||
"Force using DS instruction immediate offsets on SI">;
|
||||
|
||||
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
|
||||
"FlatForGlobal",
|
||||
"true",
|
||||
"Force to generate flat instruction for global">;
|
||||
"CFALUBug",
|
||||
"true",
|
||||
"GPU has CF_ALU bug"
|
||||
>;
|
||||
|
||||
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
||||
"FlatAddressSpace",
|
||||
"true",
|
||||
"Support flat address space">;
|
||||
"FlatAddressSpace",
|
||||
"true",
|
||||
"Support flat address space"
|
||||
>;
|
||||
|
||||
def FeatureXNACK : SubtargetFeature<"xnack",
|
||||
"EnableXNACK",
|
||||
"true",
|
||||
"Enable XNACK support">;
|
||||
|
||||
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
|
||||
"EnableVGPRSpilling",
|
||||
"true",
|
||||
"Enable spilling of VGPRs to scratch memory">;
|
||||
"EnableXNACK",
|
||||
"true",
|
||||
"Enable XNACK support"
|
||||
>;
|
||||
|
||||
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
|
||||
"SGPRInitBug",
|
||||
"true",
|
||||
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
|
||||
|
||||
def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
|
||||
"EnableHugeScratchBuffer",
|
||||
"true",
|
||||
"Enable scratch buffer sizes greater than 128 GB">;
|
||||
|
||||
def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
|
||||
"EnableSIScheduler",
|
||||
"true",
|
||||
"Enable SI Machine Scheduler">;
|
||||
"SGPRInitBug",
|
||||
"true",
|
||||
"VI SGPR initilization bug requiring a fixed SGPR allocation size"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
Value,
|
||||
"Limit the maximum number of fetches in a clause to "#Value>;
|
||||
"TexVTXClauseSize",
|
||||
Value,
|
||||
"Limit the maximum number of fetches in a clause to "#Value
|
||||
>;
|
||||
|
||||
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
|
||||
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
|
||||
|
||||
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
|
||||
"wavefrontsize"#Value,
|
||||
"WavefrontSize",
|
||||
!cast<string>(Value),
|
||||
"The number of threads per wavefront">;
|
||||
"wavefrontsize"#Value,
|
||||
"WavefrontSize",
|
||||
!cast<string>(Value),
|
||||
"The number of threads per wavefront"
|
||||
>;
|
||||
|
||||
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
|
||||
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
|
||||
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
|
||||
|
||||
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
|
||||
"ldsbankcount"#Value,
|
||||
"LDSBankCount",
|
||||
!cast<string>(Value),
|
||||
"The number of LDS banks per compute unit.">;
|
||||
"ldsbankcount"#Value,
|
||||
"LDSBankCount",
|
||||
!cast<string>(Value),
|
||||
"The number of LDS banks per compute unit."
|
||||
>;
|
||||
|
||||
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
|
||||
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
|
||||
|
||||
class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
|
||||
: SubtargetFeature <
|
||||
"isaver"#Major#"."#Minor#"."#Stepping,
|
||||
"IsaVersion",
|
||||
"ISAVersion"#Major#"_"#Minor#"_"#Stepping,
|
||||
"Instruction set version number"
|
||||
"isaver"#Major#"."#Minor#"."#Stepping,
|
||||
"IsaVersion",
|
||||
"ISAVersion"#Major#"_"#Minor#"_"#Stepping,
|
||||
"Instruction set version number"
|
||||
>;
|
||||
|
||||
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
|
||||
@ -187,36 +119,135 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
|
||||
def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>;
|
||||
|
||||
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
|
||||
"localmemorysize"#Value,
|
||||
"LocalMemorySize",
|
||||
!cast<string>(Value),
|
||||
"The size of local memory in bytes">;
|
||||
"localmemorysize"#Value,
|
||||
"LocalMemorySize",
|
||||
!cast<string>(Value),
|
||||
"The size of local memory in bytes"
|
||||
>;
|
||||
|
||||
def FeatureGCN : SubtargetFeature<"gcn",
|
||||
"IsGCN",
|
||||
"true",
|
||||
"GCN or newer GPU">;
|
||||
"IsGCN",
|
||||
"true",
|
||||
"GCN or newer GPU"
|
||||
>;
|
||||
|
||||
def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
|
||||
"GCN1Encoding",
|
||||
"true",
|
||||
"Encoding format for SI and CI">;
|
||||
"GCN1Encoding",
|
||||
"true",
|
||||
"Encoding format for SI and CI"
|
||||
>;
|
||||
|
||||
def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
|
||||
"GCN3Encoding",
|
||||
"true",
|
||||
"Encoding format for VI">;
|
||||
"GCN3Encoding",
|
||||
"true",
|
||||
"Encoding format for VI"
|
||||
>;
|
||||
|
||||
def FeatureCIInsts : SubtargetFeature<"ci-insts",
|
||||
"CIInsts",
|
||||
"true",
|
||||
"Additional intstructions for CI+">;
|
||||
"CIInsts",
|
||||
"true",
|
||||
"Additional intstructions for CI+"
|
||||
>;
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
|
||||
// Some instructions do not support denormals despite this flag. Using
|
||||
// fp32 denormals also causes instructions to run at the double
|
||||
// precision rate for the device.
|
||||
def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
|
||||
"FP32Denormals",
|
||||
"true",
|
||||
"Enable single precision denormal handling"
|
||||
>;
|
||||
|
||||
def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
|
||||
"FP64Denormals",
|
||||
"true",
|
||||
"Enable double precision denormal handling",
|
||||
[FeatureFP64]
|
||||
>;
|
||||
|
||||
def FeatureEnableHugeScratchBuffer : SubtargetFeature<
|
||||
"huge-scratch-buffer",
|
||||
"EnableHugeScratchBuffer",
|
||||
"true",
|
||||
"Enable scratch buffer sizes greater than 128 GB"
|
||||
>;
|
||||
|
||||
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
|
||||
"EnableVGPRSpilling",
|
||||
"true",
|
||||
"Enable spilling of VGPRs to scratch memory"
|
||||
>;
|
||||
|
||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||
"DumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter"
|
||||
>;
|
||||
|
||||
def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
|
||||
"DumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter"
|
||||
>;
|
||||
|
||||
def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
|
||||
"EnableIRStructurizer",
|
||||
"false",
|
||||
"Disable IR Structurizer"
|
||||
>;
|
||||
|
||||
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
|
||||
"EnablePromoteAlloca",
|
||||
"true",
|
||||
"Enable promote alloca pass"
|
||||
>;
|
||||
|
||||
// XXX - This should probably be removed once enabled by default
|
||||
def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
|
||||
"EnableLoadStoreOpt",
|
||||
"true",
|
||||
"Enable SI load/store optimizer pass"
|
||||
>;
|
||||
|
||||
// Performance debugging feature. Allow using DS instruction immediate
|
||||
// offsets even if the base pointer can't be proven to be base. On SI,
|
||||
// base pointer values that won't give the same result as a 16-bit add
|
||||
// are not safe to fold, but this will override the conservative test
|
||||
// for the base pointer.
|
||||
def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
|
||||
"unsafe-ds-offset-folding",
|
||||
"EnableUnsafeDSOffsetFolding",
|
||||
"true",
|
||||
"Force using DS instruction immediate offsets on SI"
|
||||
>;
|
||||
|
||||
def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
|
||||
"EnableIfCvt",
|
||||
"false",
|
||||
"Disable the if conversion pass"
|
||||
>;
|
||||
|
||||
def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
|
||||
"EnableSIScheduler",
|
||||
"true",
|
||||
"Enable SI Machine Scheduler"
|
||||
>;
|
||||
|
||||
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
|
||||
"FlatForGlobal",
|
||||
"true",
|
||||
"Force to generate flat instruction for global"
|
||||
>;
|
||||
|
||||
// Dummy feature used to disable assembler instructions.
|
||||
def FeatureDisable : SubtargetFeature<"",
|
||||
"FeatureDisable","true",
|
||||
"Dummy feature to disable assembler"
|
||||
" instructions">;
|
||||
"FeatureDisable","true",
|
||||
"Dummy feature to disable assembler instructions"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureGeneration <string Value,
|
||||
list<SubtargetFeature> Implies> :
|
||||
@ -228,33 +259,39 @@ def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
|
||||
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
|
||||
|
||||
def FeatureR600 : SubtargetFeatureGeneration<"R600",
|
||||
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
|
||||
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
|
||||
>;
|
||||
|
||||
def FeatureR700 : SubtargetFeatureGeneration<"R700",
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize0]>;
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize0]
|
||||
>;
|
||||
|
||||
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize32768]
|
||||
>;
|
||||
|
||||
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
|
||||
[FeatureFetchLimit16, FeatureWavefrontSize64,
|
||||
FeatureLocalMemorySize32768]
|
||||
[FeatureFetchLimit16, FeatureWavefrontSize64,
|
||||
FeatureLocalMemorySize32768]
|
||||
>;
|
||||
|
||||
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize32768,
|
||||
FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
|
||||
FeatureLDSBankCount32]>;
|
||||
[FeatureFP64, FeatureLocalMemorySize32768,
|
||||
FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
|
||||
FeatureLDSBankCount32]
|
||||
>;
|
||||
|
||||
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
|
||||
FeatureGCN1Encoding, FeatureCIInsts]>;
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
|
||||
FeatureGCN1Encoding, FeatureCIInsts]
|
||||
>;
|
||||
|
||||
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -284,6 +321,7 @@ def NullALU : InstrItinClass;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TruePredicate : Predicate<"true">;
|
||||
|
||||
def isSICI : Predicate<
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
|
||||
@ -293,6 +331,13 @@ def isVI : Predicate <
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
|
||||
AssemblerPredicate<"FeatureGCN3Encoding">;
|
||||
|
||||
def isCIVI : Predicate <
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
|
||||
>, AssemblerPredicate<"FeatureCIInsts">;
|
||||
|
||||
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
|
||||
|
||||
class PredicateControl {
|
||||
Predicate SubtargetPredicate;
|
||||
Predicate SIAssemblerPredicate = isSICI;
|
||||
|
@ -587,13 +587,6 @@ def cvt_flr_i32_f32 : PatFrag <
|
||||
[{ (void)N; return TM.Options.NoNaNsFPMath; }]
|
||||
>;
|
||||
|
||||
/*
|
||||
class UMUL24Pattern <Instruction UMUL24> : Pat <
|
||||
(mul U24:$x, U24:$y),
|
||||
(UMUL24 $x, $y)
|
||||
>;
|
||||
*/
|
||||
|
||||
class IMad24Pat<Instruction Inst> : Pat <
|
||||
(add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
|
||||
(Inst $src0, $src1, $src2)
|
||||
|
@ -25,14 +25,6 @@
|
||||
// BUFFER_LOAD_DWORDX3
|
||||
// BUFFER_STORE_DWORDX3
|
||||
|
||||
|
||||
def isCIVI : Predicate <
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
|
||||
>, AssemblerPredicate<"FeatureCIInsts">;
|
||||
|
||||
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP1 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -262,7 +254,7 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
|
||||
flat<0x60>, "flat_atomic_fmax_x2", VReg_64
|
||||
>;
|
||||
|
||||
} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
|
||||
} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
|
||||
|
||||
let Predicates = [isCI] in {
|
||||
|
||||
@ -289,7 +281,7 @@ def : Pat <
|
||||
|
||||
let Predicates = [isCIVI] in {
|
||||
|
||||
// Patterns for global loads with no offset
|
||||
// Patterns for global loads with no offset.
|
||||
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 0, 0, 0)
|
||||
|
@ -101,7 +101,7 @@ let isMoveImm = 1 in {
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
|
||||
defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
|
||||
} // let isRematerializeable = 1
|
||||
} // End isRematerializeable = 1
|
||||
|
||||
let Uses = [SCC] in {
|
||||
defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
|
||||
@ -1234,7 +1234,7 @@ defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
|
||||
VOP_F64_I32, uint_to_fp
|
||||
>;
|
||||
|
||||
} // let SchedRW = [WriteQuarterRate32]
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
|
||||
VOP_F32_F32, AMDGPUfract
|
||||
@ -1270,7 +1270,7 @@ defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
|
||||
VOP_F32_F32, AMDGPUrsq
|
||||
>;
|
||||
|
||||
} //let SchedRW = [WriteQuarterRate32]
|
||||
} // End SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
|
||||
@ -1281,7 +1281,7 @@ defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
|
||||
VOP_F64_F64, AMDGPUrsq
|
||||
>;
|
||||
|
||||
} // let SchedRW = [WriteDouble];
|
||||
} // End SchedRW = [WriteDouble];
|
||||
|
||||
defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
|
||||
VOP_F32_F32, fsqrt
|
||||
@ -1710,7 +1710,7 @@ defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
|
||||
defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
|
||||
VOP_I32_I32_I32_I32
|
||||
>;
|
||||
////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
|
||||
//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
|
||||
defm V_DIV_FIXUP_F32 : VOP3Inst <
|
||||
vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
|
||||
>;
|
||||
@ -1740,13 +1740,13 @@ defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64",
|
||||
VOP_F64_F64_F64, fmaxnum
|
||||
>;
|
||||
|
||||
} // isCommutable = 1
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
|
||||
VOP_F64_F64_I32, AMDGPUldexp
|
||||
>;
|
||||
|
||||
} // let SchedRW = [WriteDoubleAdd]
|
||||
} // End let SchedRW = [WriteDoubleAdd]
|
||||
|
||||
let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
|
||||
|
||||
@ -1764,7 +1764,7 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
|
||||
VOP_I32_I32_I32, mulhs
|
||||
>;
|
||||
|
||||
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
|
||||
} // End isCommutable = 1, SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteFloatFMA, WriteSALU] in {
|
||||
defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
|
||||
@ -1777,7 +1777,7 @@ let SchedRW = [WriteDouble, WriteSALU] in {
|
||||
defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
|
||||
VOP3b_F64_I1_F64_F64_F64
|
||||
>;
|
||||
} // let SchedRW = [WriteDouble]
|
||||
} // End SchedRW = [WriteDouble]
|
||||
|
||||
let isCommutable = 1, Uses = [VCC, EXEC] in {
|
||||
|
||||
@ -1814,7 +1814,7 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
|
||||
vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
|
||||
>;
|
||||
|
||||
} // let SchedRW = [WriteDouble]
|
||||
} // End SchedRW = [WriteDouble]
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
@ -1856,7 +1856,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
|
||||
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
|
||||
// pass to enable folding of inline immediates.
|
||||
def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
|
||||
} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
|
||||
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0
|
||||
|
||||
let hasSideEffects = 1, SALU = 1 in {
|
||||
def SGPR_USE : InstSI <(outs),(ins), "", []>;
|
||||
@ -1893,7 +1893,7 @@ def SI_LOOP : InstSI <
|
||||
[(int_amdgcn_loop i64:$saved, bb:$target)]
|
||||
>;
|
||||
|
||||
} // end isBranch = 1, isTerminator = 1
|
||||
} // End isBranch = 1, isTerminator = 1
|
||||
|
||||
def SI_BREAK : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
@ -1934,7 +1934,7 @@ def SI_KILL : InstSI <
|
||||
>;
|
||||
} // End Uses = [EXEC], Defs = [EXEC,VCC]
|
||||
|
||||
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
||||
} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
||||
|
||||
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
|
||||
|
||||
@ -1967,7 +1967,7 @@ def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
|
||||
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
|
||||
def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
|
||||
|
||||
} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
|
||||
} // End Uses = [EXEC], Defs = [EXEC,VCC,M0]
|
||||
|
||||
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
|
||||
|
||||
@ -1975,8 +1975,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins sgpr_class:$src, i32imm:$frame_idx),
|
||||
"", []
|
||||
> {
|
||||
"", []> {
|
||||
let mayStore = 1;
|
||||
let mayLoad = 0;
|
||||
}
|
||||
@ -1984,8 +1983,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
|
||||
def _RESTORE : InstSI <
|
||||
(outs sgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx),
|
||||
"", []
|
||||
> {
|
||||
"", []> {
|
||||
let mayStore = 0;
|
||||
let mayLoad = 1;
|
||||
}
|
||||
@ -2007,8 +2005,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
|
||||
(outs),
|
||||
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
|
||||
SReg_32:$scratch_offset),
|
||||
"", []
|
||||
> {
|
||||
"", []> {
|
||||
let mayStore = 1;
|
||||
let mayLoad = 0;
|
||||
}
|
||||
@ -2016,8 +2013,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
|
||||
def _RESTORE : InstSI <
|
||||
(outs vgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
|
||||
"", []
|
||||
> {
|
||||
"", []> {
|
||||
let mayStore = 0;
|
||||
let mayLoad = 1;
|
||||
}
|
||||
@ -2043,9 +2039,9 @@ def SI_CONSTDATA_PTR : InstSI <
|
||||
|
||||
} // End Defs = [SCC]
|
||||
|
||||
} // end IsCodeGenOnly, isPseudo
|
||||
} // End isCodeGenOnly, isPseudo
|
||||
|
||||
} // end SubtargetPredicate = isGCN
|
||||
} // End SubtargetPredicate = isGCN
|
||||
|
||||
let Predicates = [isGCN] in {
|
||||
|
||||
@ -2060,7 +2056,6 @@ def : Pat<
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
/* int_SI_export */
|
||||
def : Pat <
|
||||
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
|
||||
f32:$src0, f32:$src1, f32:$src2, f32:$src3),
|
||||
@ -2445,11 +2440,6 @@ class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type
|
||||
/********** Extraction, Insertion, Building and Casting **********/
|
||||
/********** ============================================ **********/
|
||||
|
||||
//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
|
||||
//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
|
||||
//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
|
||||
//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
|
||||
|
||||
foreach Index = 0-2 in {
|
||||
def Extract_Element_v2i32_#Index : Extract_Element <
|
||||
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
@ -2514,46 +2504,45 @@ foreach Index = 0-15 in {
|
||||
>;
|
||||
}
|
||||
|
||||
def : BitConvert <i32, f32, SReg_32>;
|
||||
// FIXME: Why do only some of these type combinations for SReg and
|
||||
// VReg?
|
||||
// 32-bit bitcast
|
||||
def : BitConvert <i32, f32, VGPR_32>;
|
||||
|
||||
def : BitConvert <f32, i32, SReg_32>;
|
||||
def : BitConvert <f32, i32, VGPR_32>;
|
||||
def : BitConvert <i32, f32, SReg_32>;
|
||||
def : BitConvert <f32, i32, SReg_32>;
|
||||
|
||||
// 64-bit bitcast
|
||||
def : BitConvert <i64, f64, VReg_64>;
|
||||
|
||||
def : BitConvert <f64, i64, VReg_64>;
|
||||
|
||||
def : BitConvert <v2f32, v2i32, VReg_64>;
|
||||
def : BitConvert <v2i32, v2f32, VReg_64>;
|
||||
def : BitConvert <v2i32, i64, VReg_64>;
|
||||
def : BitConvert <v2f32, v2i32, VReg_64>;
|
||||
def : BitConvert <i64, v2i32, VReg_64>;
|
||||
def : BitConvert <v2f32, i64, VReg_64>;
|
||||
def : BitConvert <v2i32, i64, VReg_64>;
|
||||
def : BitConvert <i64, v2f32, VReg_64>;
|
||||
def : BitConvert <v2f32, f64, VReg_64>;
|
||||
def : BitConvert <v2i32, f64, VReg_64>;
|
||||
def : BitConvert <v2f32, i64, VReg_64>;
|
||||
def : BitConvert <f64, v2f32, VReg_64>;
|
||||
def : BitConvert <v2f32, f64, VReg_64>;
|
||||
def : BitConvert <f64, v2i32, VReg_64>;
|
||||
def : BitConvert <v4f32, v4i32, VReg_128>;
|
||||
def : BitConvert <v2i32, f64, VReg_64>;
|
||||
def : BitConvert <v4i32, v4f32, VReg_128>;
|
||||
def : BitConvert <v4f32, v4i32, VReg_128>;
|
||||
|
||||
|
||||
// 128-bit bitcast
|
||||
def : BitConvert <v2i64, v4i32, SReg_128>;
|
||||
def : BitConvert <v4i32, v2i64, SReg_128>;
|
||||
|
||||
def : BitConvert <v2f64, v4f32, VReg_128>;
|
||||
def : BitConvert <v2f64, v4i32, VReg_128>;
|
||||
def : BitConvert <v4f32, v2f64, VReg_128>;
|
||||
def : BitConvert <v4i32, v2f64, VReg_128>;
|
||||
|
||||
|
||||
|
||||
|
||||
def : BitConvert <v8f32, v8i32, SReg_256>;
|
||||
// 256-bit bitcast
|
||||
def : BitConvert <v8i32, v8f32, SReg_256>;
|
||||
def : BitConvert <v8f32, v8i32, SReg_256>;
|
||||
def : BitConvert <v8i32, v8f32, VReg_256>;
|
||||
def : BitConvert <v8f32, v8i32, VReg_256>;
|
||||
|
||||
// 512-bit bitcast
|
||||
def : BitConvert <v16i32, v16f32, VReg_512>;
|
||||
def : BitConvert <v16f32, v16i32, VReg_512>;
|
||||
|
||||
@ -2575,7 +2564,7 @@ def : Pat <
|
||||
|
||||
def : Pat <
|
||||
(fneg (fabs f32:$src)),
|
||||
(S_OR_B32 $src, 0x80000000) /* Set sign bit */
|
||||
(S_OR_B32 $src, 0x80000000) // Set sign bit
|
||||
>;
|
||||
|
||||
// FIXME: Should use S_OR_B32
|
||||
@ -2665,7 +2654,6 @@ def : Pat <
|
||||
/********** Intrinsic Patterns **********/
|
||||
/********** ================== **********/
|
||||
|
||||
/* llvm.AMDGPU.pow */
|
||||
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
|
||||
|
||||
def : Pat <
|
||||
@ -2702,7 +2690,7 @@ class Ext32Pat <SDNode ext> : Pat <
|
||||
def : Ext32Pat <zext>;
|
||||
def : Ext32Pat <anyext>;
|
||||
|
||||
// Offset in an 32Bit VGPR
|
||||
// Offset in an 32-bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
|
||||
@ -2934,22 +2922,6 @@ def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
|
||||
|
||||
/*
|
||||
class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
|
||||
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
|
||||
(Instr $value, $srsrc, $vaddr, $offset)
|
||||
>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
|
||||
def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
|
||||
def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
|
||||
def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
|
||||
def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
*/
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
Loading…
Reference in New Issue
Block a user