AMDGPU: Tidy minor td file issues

Make comments and indentation more consistent. Rearrange a few things to be in a more consistent order, such as organizing subtarget features from those describing an actual device property, and those used as options. llvm-svn: 258789
2025-01-31 20:51:52 +01:00 · 2016-01-26 04:49:22 +00:00 · 2016-01-26 04:49:22 +00:00 · f5c703425b
commit f5c703425b
parent 51a14cbbc7
4 changed files with 249 additions and 247 deletions
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@ -1,183 +1,115 @@
-//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
+//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//

 include "llvm/Target/Target.td"

-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-// Debugging Features
-
-def FeatureDumpCode : SubtargetFeature <"DumpCode",
-        "DumpCode",
-        "true",
-        "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
-        "DumpCode",
-        "true",
-        "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
-        "EnableIRStructurizer",
-        "false",
-        "Disable IR Structurizer">;
-
-def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
-        "EnablePromoteAlloca",
-        "true",
-        "Enable promote alloca pass">;
-
-// Target features
-
-def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
-        "EnableIfCvt",
-        "false",
-        "Disable the if conversion pass">;
+//===------------------------------------------------------------===//
+// Subtarget Features (device properties)
+//===------------------------------------------------------------===//

 def FeatureFP64 : SubtargetFeature<"fp64",
-        "FP64",
-        "true",
-        "Enable double precision operations">;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
-        "FP64Denormals",
-        "true",
-        "Enable double precision denormal handling",
-        [FeatureFP64]>;
+  "FP64",
+  "true",
+  "Enable double precision operations"
+>;

 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
-        "FastFMAF32",
-        "true",
-        "Assuming f32 fma is at least as fast as mul + add",
-        []>;
+  "FastFMAF32",
+  "true",
+  "Assuming f32 fma is at least as fast as mul + add"
+>;

 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
-        "HalfRate64Ops",
-        "true",
-        "Most fp64 instructions are half rate instead of quarter",
-        []>;
-
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
-        "FP32Denormals",
-        "true",
-        "Enable single precision denormal handling">;
+  "HalfRate64Ops",
+  "true",
+  "Most fp64 instructions are half rate instead of quarter"
+>;

 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-        "R600ALUInst",
-        "false",
-        "Older version of ALU instructions encoding">;
+  "R600ALUInst",
+  "false",
+  "Older version of ALU instructions encoding"
+>;

 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
-        "HasVertexCache",
-        "true",
-        "Specify use of dedicated vertex cache">;
+  "HasVertexCache",
+  "true",
+  "Specify use of dedicated vertex cache"
+>;

 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
-        "CaymanISA",
-        "true",
-        "Use Cayman ISA">;
+  "CaymanISA",
+  "true",
+  "Use Cayman ISA"
+>;

 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
-        "CFALUBug",
-        "true",
-        "GPU has CF_ALU bug">;
-
-// XXX - This should probably be removed once enabled by default
-def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
-        "EnableLoadStoreOpt",
-        "true",
-        "Enable SI load/store optimizer pass">;
-
-// Performance debugging feature. Allow using DS instruction immediate
-// offsets even if the base pointer can't be proven to be base. On SI,
-// base pointer values that won't give the same result as a 16-bit add
-// are not safe to fold, but this will override the conservative test
-// for the base pointer.
-def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
-        "EnableUnsafeDSOffsetFolding",
-        "true",
-        "Force using DS instruction immediate offsets on SI">;
-
-def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
-        "FlatForGlobal",
-        "true",
-        "Force to generate flat instruction for global">;
+  "CFALUBug",
+  "true",
+  "GPU has CF_ALU bug"
+>;

 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
-        "FlatAddressSpace",
-        "true",
-        "Support flat address space">;
+  "FlatAddressSpace",
+  "true",
+  "Support flat address space"
+>;

 def FeatureXNACK : SubtargetFeature<"xnack",
-        "EnableXNACK",
-        "true",
-        "Enable XNACK support">;
-
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
-        "EnableVGPRSpilling",
-        "true",
-        "Enable spilling of VGPRs to scratch memory">;
+  "EnableXNACK",
+  "true",
+  "Enable XNACK support"
+>;

 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
-        "SGPRInitBug",
-        "true",
-        "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
-
-def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
-        "EnableHugeScratchBuffer",
-        "true",
-        "Enable scratch buffer sizes greater than 128 GB">;
-
-def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
-        "EnableSIScheduler",
-        "true",
-        "Enable SI Machine Scheduler">;
+  "SGPRInitBug",
+  "true",
+  "VI SGPR initilization bug requiring a fixed SGPR allocation size"
+>;

 class SubtargetFeatureFetchLimit <string Value> :
                          SubtargetFeature <"fetch"#Value,
-        "TexVTXClauseSize",
-        Value,
-        "Limit the maximum number of fetches in a clause to "#Value>;
+  "TexVTXClauseSize",
+  Value,
+  "Limit the maximum number of fetches in a clause to "#Value
+>;

 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;

 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
-        "wavefrontsize"#Value,
-        "WavefrontSize",
-        !cast<string>(Value),
-        "The number of threads per wavefront">;
+  "wavefrontsize"#Value,
+  "WavefrontSize",
+  !cast<string>(Value),
+  "The number of threads per wavefront"
+>;

 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;

 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
-      "ldsbankcount"#Value,
-      "LDSBankCount",
-      !cast<string>(Value),
-      "The number of LDS banks per compute unit.">;
+  "ldsbankcount"#Value,
+  "LDSBankCount",
+  !cast<string>(Value),
+  "The number of LDS banks per compute unit."
+>;

 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;

 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
                                 : SubtargetFeature <
-      "isaver"#Major#"."#Minor#"."#Stepping,
-      "IsaVersion",
-      "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
-      "Instruction set version number"
+  "isaver"#Major#"."#Minor#"."#Stepping,
+  "IsaVersion",
+  "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+  "Instruction set version number"
 >;

 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
@ -187,36 +119,135 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>;

 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
-        "localmemorysize"#Value,
-        "LocalMemorySize",
-        !cast<string>(Value),
-        "The size of local memory in bytes">;
+  "localmemorysize"#Value,
+  "LocalMemorySize",
+  !cast<string>(Value),
+  "The size of local memory in bytes"
+>;

 def FeatureGCN : SubtargetFeature<"gcn",
-        "IsGCN",
-        "true",
-        "GCN or newer GPU">;
+  "IsGCN",
+  "true",
+  "GCN or newer GPU"
+>;

 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
-        "GCN1Encoding",
-        "true",
-        "Encoding format for SI and CI">;
+  "GCN1Encoding",
+  "true",
+  "Encoding format for SI and CI"
+>;

 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
-        "GCN3Encoding",
-        "true",
-        "Encoding format for VI">;
+  "GCN3Encoding",
+  "true",
+  "Encoding format for VI"
+>;

 def FeatureCIInsts : SubtargetFeature<"ci-insts",
-        "CIInsts",
-        "true",
-        "Additional intstructions for CI+">;
+  "CIInsts",
+  "true",
+  "Additional intstructions for CI+"
+>;
+
+//===------------------------------------------------------------===//
+// Subtarget Features (options and debugging)
+//===------------------------------------------------------------===//
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+  "FP32Denormals",
+  "true",
+  "Enable single precision denormal handling"
+>;
+
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+  "FP64Denormals",
+  "true",
+  "Enable double precision denormal handling",
+  [FeatureFP64]
+>;
+
+def FeatureEnableHugeScratchBuffer : SubtargetFeature<
+  "huge-scratch-buffer",
+  "EnableHugeScratchBuffer",
+  "true",
+  "Enable scratch buffer sizes greater than 128 GB"
+>;
+
+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+  "EnableVGPRSpilling",
+  "true",
+  "Enable spilling of VGPRs to scratch memory"
+>;
+
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+  "DumpCode",
+  "true",
+  "Dump MachineInstrs in the CodeEmitter"
+>;
+
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+  "DumpCode",
+  "true",
+  "Dump MachineInstrs in the CodeEmitter"
+>;
+
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+  "EnableIRStructurizer",
+  "false",
+  "Disable IR Structurizer"
+>;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+  "EnablePromoteAlloca",
+  "true",
+  "Enable promote alloca pass"
+>;
+
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+  "EnableLoadStoreOpt",
+  "true",
+  "Enable SI load/store optimizer pass"
+>;
+
+// Performance debugging feature. Allow using DS instruction immediate
+// offsets even if the base pointer can't be proven to be base. On SI,
+// base pointer values that won't give the same result as a 16-bit add
+// are not safe to fold, but this will override the conservative test
+// for the base pointer.
+def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
+  "unsafe-ds-offset-folding",
+  "EnableUnsafeDSOffsetFolding",
+  "true",
+  "Force using DS instruction immediate offsets on SI"
+>;
+
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+  "EnableIfCvt",
+  "false",
+  "Disable the if conversion pass"
+>;
+
+def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
+  "EnableSIScheduler",
+  "true",
+  "Enable SI Machine Scheduler"
+>;
+
+def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
+  "FlatForGlobal",
+  "true",
+  "Force to generate flat instruction for global"
+>;

 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
-                                      "FeatureDisable","true",
-                                      "Dummy feature to disable assembler"
-                                      " instructions">;
+  "FeatureDisable","true",
+  "Dummy feature to disable assembler instructions"
+>;

 class SubtargetFeatureGeneration <string Value,
                                  list<SubtargetFeature> Implies> :
@ -228,33 +259,39 @@ def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;

 def FeatureR600 : SubtargetFeatureGeneration<"R600",
-        [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
+  [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+>;

 def FeatureR700 : SubtargetFeatureGeneration<"R700",
-        [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
+  [FeatureFetchLimit16, FeatureLocalMemorySize0]
+>;

 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
-        [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
+  [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+>;

 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
-        [FeatureFetchLimit16, FeatureWavefrontSize64,
-         FeatureLocalMemorySize32768]
+  [FeatureFetchLimit16, FeatureWavefrontSize64,
+   FeatureLocalMemorySize32768]
 >;

 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
-        [FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
-         FeatureLDSBankCount32]>;
+  [FeatureFP64, FeatureLocalMemorySize32768,
+  FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+  FeatureLDSBankCount32]
+>;

 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
-        [FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
-         FeatureGCN1Encoding, FeatureCIInsts]>;
+  [FeatureFP64, FeatureLocalMemorySize65536,
+  FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+  FeatureGCN1Encoding, FeatureCIInsts]
+>;

 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
-        [FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-         FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
+  [FeatureFP64, FeatureLocalMemorySize65536,
+   FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+   FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]
+>;

 //===----------------------------------------------------------------------===//

@ -284,6 +321,7 @@ def NullALU : InstrItinClass;
 //===----------------------------------------------------------------------===//

 def TruePredicate : Predicate<"true">;
+
 def isSICI : Predicate<
  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@ -293,6 +331,13 @@ def isVI : Predicate <
  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
  AssemblerPredicate<"FeatureGCN3Encoding">;

+def isCIVI : Predicate <
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+  "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>, AssemblerPredicate<"FeatureCIInsts">;
+
+def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
+
 class PredicateControl {
  Predicate SubtargetPredicate;
  Predicate SIAssemblerPredicate = isSICI;
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@ -587,13 +587,6 @@ def cvt_flr_i32_f32 : PatFrag <
  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
 >;

-/*
-class UMUL24Pattern <Instruction UMUL24> : Pat <
-  (mul U24:$x, U24:$y),
-  (UMUL24 $x, $y)
->;
-*/
-
 class IMad24Pat<Instruction Inst> : Pat <
  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
  (Inst $src0, $src1, $src2)
--- a/lib/Target/AMDGPU/CIInstructions.td
+++ b/lib/Target/AMDGPU/CIInstructions.td
@ -25,14 +25,6 @@
 // BUFFER_LOAD_DWORDX3
 // BUFFER_STORE_DWORDX3

-
-def isCIVI : Predicate <
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
-  "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
-
-def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
-
 //===----------------------------------------------------------------------===//
 // VOP1 Instructions
 //===----------------------------------------------------------------------===//
@ -262,7 +254,7 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
  flat<0x60>, "flat_atomic_fmax_x2", VReg_64
 >;

-} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
+} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst

 let Predicates = [isCI] in {

@ -289,7 +281,7 @@ def : Pat <

 let Predicates = [isCIVI] in {

-// Patterns for global loads with no offset
+// Patterns for global loads with no offset.
 class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
  (vt (node i64:$addr)),
  (inst $addr, 0, 0, 0)
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@ -101,7 +101,7 @@ let isMoveImm = 1 in {
  let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
    defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
    defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
-  } // let isRematerializeable = 1
+  } // End isRematerializeable = 1

  let Uses = [SCC] in {
    defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
@ -1234,7 +1234,7 @@ defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
  VOP_F64_I32, uint_to_fp
 >;

-} // let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]

 defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
  VOP_F32_F32, AMDGPUfract
@ -1270,7 +1270,7 @@ defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
  VOP_F32_F32, AMDGPUrsq
 >;

-} //let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]

 let SchedRW = [WriteDouble] in {

@ -1281,7 +1281,7 @@ defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
  VOP_F64_F64, AMDGPUrsq
 >;

-} // let SchedRW = [WriteDouble];
+} // End SchedRW = [WriteDouble];

 defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
  VOP_F32_F32, fsqrt
@ -1710,7 +1710,7 @@ defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
 defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
  VOP_I32_I32_I32_I32
 >;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
+//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
 defm V_DIV_FIXUP_F32 : VOP3Inst <
  vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
 >;
@ -1740,13 +1740,13 @@ defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64",
  VOP_F64_F64_F64, fmaxnum
 >;

-} // isCommutable = 1
+} // End isCommutable = 1

 defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
  VOP_F64_F64_I32, AMDGPUldexp
 >;

-} // let SchedRW = [WriteDoubleAdd]
+} // End let SchedRW = [WriteDoubleAdd]

 let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {

@ -1764,7 +1764,7 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
  VOP_I32_I32_I32, mulhs
 >;

-} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
+} // End isCommutable = 1, SchedRW = [WriteQuarterRate32]

 let SchedRW = [WriteFloatFMA, WriteSALU] in {
 defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
@ -1777,7 +1777,7 @@ let SchedRW = [WriteDouble, WriteSALU] in {
 defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
  VOP3b_F64_I1_F64_F64_F64
 >;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]

 let isCommutable = 1, Uses = [VCC, EXEC] in {

@ -1814,7 +1814,7 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
  vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
 >;

-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]

 // These instructions only exist on SI and CI
 let SubtargetPredicate = isSICI in {
@ -1856,7 +1856,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 // pass to enable folding of inline immediates.
 def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
-} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
+} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0

 let hasSideEffects = 1, SALU = 1 in {
 def SGPR_USE : InstSI <(outs),(ins), "", []>;
@ -1893,7 +1893,7 @@ def SI_LOOP : InstSI <
  [(int_amdgcn_loop i64:$saved, bb:$target)]
 >;

-} // end isBranch = 1, isTerminator = 1
+} // End isBranch = 1, isTerminator = 1

 def SI_BREAK : InstSI <
  (outs SReg_64:$dst),
@ -1934,7 +1934,7 @@ def SI_KILL : InstSI <
 >;
 } // End Uses = [EXEC], Defs = [EXEC,VCC]

-} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1

 let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {

@ -1967,7 +1967,7 @@ def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
 def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
 def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;

-} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+} // End Uses = [EXEC], Defs = [EXEC,VCC,M0]

 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {

@ -1975,8 +1975,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
    def _SAVE : InstSI <
      (outs),
      (ins sgpr_class:$src, i32imm:$frame_idx),
-      "", []
-    > {
+      "", []> {
      let mayStore = 1;
      let mayLoad = 0;
    }
@ -1984,8 +1983,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
    def _RESTORE : InstSI <
      (outs sgpr_class:$dst),
      (ins i32imm:$frame_idx),
-      "", []
-    > {
+      "", []> {
      let mayStore = 0;
      let mayLoad = 1;
    }
@ -2007,8 +2005,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
      (outs),
      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
           SReg_32:$scratch_offset),
-      "", []
-    > {
+      "", []> {
      let mayStore = 1;
      let mayLoad = 0;
    }
@ -2016,8 +2013,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
    def _RESTORE : InstSI <
      (outs vgpr_class:$dst),
      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
-      "", []
-    > {
+      "", []> {
      let mayStore = 0;
      let mayLoad = 1;
    }
@ -2043,9 +2039,9 @@ def SI_CONSTDATA_PTR : InstSI <

 } // End Defs = [SCC]

-} // end IsCodeGenOnly, isPseudo
+} // End isCodeGenOnly, isPseudo

-} // end SubtargetPredicate = isGCN
+} // End SubtargetPredicate = isGCN

 let Predicates = [isGCN] in {

@ -2060,7 +2056,6 @@ def : Pat<
  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
 >;

-/* int_SI_export */
 def : Pat <
  (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
                 f32:$src0, f32:$src1, f32:$src2, f32:$src3),
@ -2445,11 +2440,6 @@ class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type
 /********** Extraction, Insertion, Building and Casting  **********/
 /********** ============================================ **********/

-//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
-//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
-//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
-//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
-
 foreach Index = 0-2 in {
  def Extract_Element_v2i32_#Index : Extract_Element <
    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
@ -2514,46 +2504,45 @@ foreach Index = 0-15 in {
  >;
 }

-def : BitConvert <i32, f32, SReg_32>;
+// FIXME: Why do only some of these type combinations for SReg and
+// VReg?
+// 32-bit bitcast
 def : BitConvert <i32, f32, VGPR_32>;
-
-def : BitConvert <f32, i32, SReg_32>;
 def : BitConvert <f32, i32, VGPR_32>;
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <f32, i32, SReg_32>;

+// 64-bit bitcast
 def : BitConvert <i64, f64, VReg_64>;
-
 def : BitConvert <f64, i64, VReg_64>;
-
-def : BitConvert <v2f32, v2i32, VReg_64>;
 def : BitConvert <v2i32, v2f32, VReg_64>;
-def : BitConvert <v2i32, i64, VReg_64>;
+def : BitConvert <v2f32, v2i32, VReg_64>;
 def : BitConvert <i64, v2i32, VReg_64>;
-def : BitConvert <v2f32, i64, VReg_64>;
+def : BitConvert <v2i32, i64, VReg_64>;
 def : BitConvert <i64, v2f32, VReg_64>;
-def : BitConvert <v2f32, f64, VReg_64>;
-def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v2f32, i64, VReg_64>;
 def : BitConvert <f64, v2f32, VReg_64>;
+def : BitConvert <v2f32, f64, VReg_64>;
 def : BitConvert <f64, v2i32, VReg_64>;
-def : BitConvert <v4f32, v4i32, VReg_128>;
+def : BitConvert <v2i32, f64, VReg_64>;
 def : BitConvert <v4i32, v4f32, VReg_128>;
+def : BitConvert <v4f32, v4i32, VReg_128>;

-
+// 128-bit bitcast
 def : BitConvert <v2i64, v4i32, SReg_128>;
 def : BitConvert <v4i32, v2i64, SReg_128>;
-
 def : BitConvert <v2f64, v4f32, VReg_128>;
 def : BitConvert <v2f64, v4i32, VReg_128>;
 def : BitConvert <v4f32, v2f64, VReg_128>;
 def : BitConvert <v4i32, v2f64, VReg_128>;

-
-
-
-def : BitConvert <v8f32, v8i32, SReg_256>;
+// 256-bit bitcast
 def : BitConvert <v8i32, v8f32, SReg_256>;
+def : BitConvert <v8f32, v8i32, SReg_256>;
 def : BitConvert <v8i32, v8f32, VReg_256>;
 def : BitConvert <v8f32, v8i32, VReg_256>;

+// 512-bit bitcast
 def : BitConvert <v16i32, v16f32, VReg_512>;
 def : BitConvert <v16f32, v16i32, VReg_512>;

@ -2575,7 +2564,7 @@ def : Pat <

 def : Pat <
  (fneg (fabs f32:$src)),
-  (S_OR_B32 $src, 0x80000000) /* Set sign bit */
+  (S_OR_B32 $src, 0x80000000) // Set sign bit
 >;

 // FIXME: Should use S_OR_B32
@ -2665,7 +2654,6 @@ def : Pat <
 /********** Intrinsic Patterns **********/
 /********** ================== **********/

-/* llvm.AMDGPU.pow */
 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;

 def : Pat <
@ -2702,7 +2690,7 @@ class Ext32Pat <SDNode ext> : Pat <
 def : Ext32Pat <zext>;
 def : Ext32Pat <anyext>;

-// Offset in an 32Bit VGPR
+// Offset in an 32-bit VGPR
 def : Pat <
  (SIload_constant v4i32:$sbase, i32:$voff),
  (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
@ -2934,22 +2922,6 @@ def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
 def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
 def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;

-/*
-class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
-  (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
-  (Instr $value, $srsrc, $vaddr, $offset)
->;
-
-let Predicates = [isSICI] in {
-def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
-} // End Predicates = [isSICI]
-
-*/
-
 //===----------------------------------------------------------------------===//
 // MTBUF Patterns
 //===----------------------------------------------------------------------===//