[AMDGPU] Disassembler: Support for all VOP1 instructions.

Support all instructions with VOP1 encoding with 32 or 64-bit operands for VI subtarget: VGPR_32 and VReg_64 operand register classes VS_32 and VS_64 operand register classes with inline and literal constants Tests for VOP1 instructions. Patch by: skolton Reviewers: arsenm, tstellarAMD Review: http://reviews.llvm.org/D17194 llvm-svn: 261878
2024-10-19 11:02:59 +02:00 · 2016-02-25 16:09:14 +00:00 · 2016-02-25 16:09:14 +00:00 · b57a0b698b
commit b57a0b698b
parent 4ad8d0c9b8
4 changed files with 495 additions and 65 deletions
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@ -39,67 +39,68 @@ typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;

 static DecodeStatus DecodeVGPR_32RegisterClass(MCInst &Inst, unsigned Imm,
                                               uint64_t Addr, const void *Decoder) {
-  const AMDGPUDisassembler *Dis = 
+  const AMDGPUDisassembler *Dis =
    static_cast<const AMDGPUDisassembler *>(Decoder);
  return Dis->DecodeVGPR_32RegisterClass(Inst, Imm, Addr);
 }

 static DecodeStatus DecodeVS_32RegisterClass(MCInst &Inst, unsigned Imm,
                                             uint64_t Addr, const void *Decoder) {
-  const AMDGPUDisassembler *Dis = 
+  const AMDGPUDisassembler *Dis =
    static_cast<const AMDGPUDisassembler *>(Decoder);
  return Dis->DecodeVS_32RegisterClass(Inst, Imm, Addr);
 }

-static DecodeStatus DecodeVS_64RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeVS_64RegisterClass(MCInst &Inst, unsigned Imm,
                                             uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
+  const AMDGPUDisassembler *Dis =
+    static_cast<const AMDGPUDisassembler *>(Decoder);
+  return Dis->DecodeVS_64RegisterClass(Inst, Imm, Addr);
 }

-static DecodeStatus DecodeVReg_64RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeVReg_64RegisterClass(MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr, const void *Decoder) {
+  const AMDGPUDisassembler *Dis =
+    static_cast<const AMDGPUDisassembler *>(Decoder);
+  return Dis->DecodeVReg_64RegisterClass(Inst, Imm, Addr);
+}
+
+static DecodeStatus DecodeVReg_96RegisterClass(MCInst &Inst, unsigned Imm,
                                               uint64_t Addr, const void *Decoder) {
  // ToDo
  return MCDisassembler::Fail;
 }

-static DecodeStatus DecodeVReg_96RegisterClass(MCInst &Inst, unsigned Imm, 
-                                               uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeVReg_128RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeVReg_128RegisterClass(MCInst &Inst, unsigned Imm,
                                                uint64_t Addr, const void *Decoder) {
  // ToDo
  return MCDisassembler::Fail;
 }

-static DecodeStatus DecodeSReg_32RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeSReg_32RegisterClass(MCInst &Inst, unsigned Imm,
                                               uint64_t Addr, const void *Decoder) {
  // ToDo
  return MCDisassembler::Fail;
 }

-static DecodeStatus DecodeSReg_64RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeSReg_64RegisterClass(MCInst &Inst, unsigned Imm,
                                               uint64_t Addr, const void *Decoder) {
  // ToDo
  return MCDisassembler::Fail;
 }

-static DecodeStatus DecodeSReg_128RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeSReg_128RegisterClass(MCInst &Inst, unsigned Imm,
                                                uint64_t Addr, const void *Decoder) {
  // ToDo
  return MCDisassembler::Fail;
 }

-static DecodeStatus DecodeSReg_256RegisterClass(MCInst &Inst, unsigned Imm, 
+static DecodeStatus DecodeSReg_256RegisterClass(MCInst &Inst, unsigned Imm,
                                                uint64_t Addr, const void *Decoder) {
  // ToDo
  return MCDisassembler::Fail;
 }

-
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
 #undef GET_SUBTARGETINFO_ENUM
@ -112,7 +113,7 @@ static DecodeStatus DecodeSReg_256RegisterClass(MCInst &Inst, unsigned Imm,

 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                ArrayRef<uint8_t> Bytes,
-                                                uint64_t Address, 
+                                                uint64_t Address,
                                                raw_ostream &WS,
                                                raw_ostream &CS) const {
  CommentStream = &CS;
@ -120,6 +121,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
  // ToDo: AMDGPUDisassembler supports only VI ISA.
  assert(AMDGPU::isVI(STI) && "Can disassemble only VI ISA.");

+  HasLiteral = false;
+  this->Bytes = Bytes;
+
  // Try decode 32-bit instruction
  if (Bytes.size() < 4) {
    Size = 0;
@ -135,14 +139,19 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
      Size = 0;
      return MCDisassembler::Fail;
  }
-  Size = 4;
+  if (HasLiteral == true) {
+    Size = 8;
+    HasLiteral = false;
+  } else {
+    Size = 4;
+  }

  return MCDisassembler::Success;
 }

-DecodeStatus AMDGPUDisassembler::DecodeLitFloat(unsigned Imm, uint32_t& F) const {
+DecodeStatus AMDGPUDisassembler::DecodeImmedFloat(unsigned Imm, uint32_t &F) const {
  // ToDo: case 248: 1/(2*PI) - is allowed only on VI
-  // ToDo: AMDGPUInstPrinter does not support 1/(2*PI). It consider 1/(2*PI) as 
+  // ToDo: AMDGPUInstPrinter does not support 1/(2*PI). It consider 1/(2*PI) as
  // literal constant.
  switch(Imm) {
  case 240: F = FloatToBits(0.5f); return MCDisassembler::Success;
@ -158,8 +167,23 @@ DecodeStatus AMDGPUDisassembler::DecodeLitFloat(unsigned Imm, uint32_t& F) const
  }
 }

-DecodeStatus AMDGPUDisassembler::DecodeLitInteger(unsigned Imm, 
-                                                  int64_t& I) const {
+DecodeStatus AMDGPUDisassembler::DecodeImmedDouble(unsigned Imm, uint64_t &D) const {
+  switch(Imm) {
+  case 240: D = DoubleToBits(0.5); return MCDisassembler::Success;
+  case 241: D = DoubleToBits(-0.5); return MCDisassembler::Success;
+  case 242: D = DoubleToBits(1.0); return MCDisassembler::Success;
+  case 243: D = DoubleToBits(-1.0); return MCDisassembler::Success;
+  case 244: D = DoubleToBits(2.0); return MCDisassembler::Success;
+  case 245: D = DoubleToBits(-2.0); return MCDisassembler::Success;
+  case 246: D = DoubleToBits(4.0); return MCDisassembler::Success;
+  case 247: D = DoubleToBits(-4.0); return MCDisassembler::Success;
+  case 248: D = 0x3fc45f306dc9c882; return MCDisassembler::Success; // 1/(2*PI)
+  default: return MCDisassembler::Fail;
+  }
+}
+
+DecodeStatus AMDGPUDisassembler::DecodeImmedInteger(unsigned Imm,
+                                                    int64_t &I) const {
  if ((Imm >= 128) && (Imm <= 192)) {
    I = Imm - 128;
    return MCDisassembler::Success;
@ -170,33 +194,91 @@ DecodeStatus AMDGPUDisassembler::DecodeLitInteger(unsigned Imm,
  return MCDisassembler::Fail;
 }

-DecodeStatus AMDGPUDisassembler::DecodeVgprRegister(unsigned Val, 
-                                                    unsigned& RegID) const {
-  if (Val > 255) {
+DecodeStatus AMDGPUDisassembler::DecodeVgprRegister(unsigned Val,
+                                                    unsigned &RegID,
+                                                    unsigned Size) const {
+  if (Val > (256 - Size / 32)) {
    return MCDisassembler::Fail;
  }
-  RegID = AMDGPUMCRegisterClasses[AMDGPU::VGPR_32RegClassID].getRegister(Val);
+  unsigned RegClassID;
+  switch (Size) {
+  case 32: RegClassID = AMDGPU::VGPR_32RegClassID; break;
+  case 64: RegClassID = AMDGPU::VReg_64RegClassID; break;
+  case 96: RegClassID = AMDGPU::VReg_96RegClassID; break;
+  case 128: RegClassID = AMDGPU::VReg_128RegClassID; break;
+  case 256: RegClassID = AMDGPU::VReg_256RegClassID; break;
+  case 512: RegClassID = AMDGPU::VReg_512RegClassID; break;
+  default:
+    return MCDisassembler::Fail;
+  }
+
+  RegID = AMDGPUMCRegisterClasses[RegClassID].getRegister(Val);
  return MCDisassembler::Success;
 }

-DecodeStatus AMDGPUDisassembler::DecodeSgprRegister(unsigned Val, 
-                                                    unsigned& RegID) const {
+DecodeStatus AMDGPUDisassembler::DecodeSgprRegister(unsigned Val,
+                                                    unsigned &RegID,
+                                                    unsigned Size) const {
  // ToDo: SI/CI have 104 SGPRs, VI - 102
-  if (Val > 101) {
+  unsigned RegClassID;
+
+  switch (Size) {
+  case 32:
+    if (Val > 101) {
+      return MCDisassembler::Fail;
+    }
+    RegClassID = AMDGPU::SGPR_32RegClassID;
+    break;
+  case 64:
+    if ((Val % 2 != 0) || (Val > 100)) {
+      return MCDisassembler::Fail;
+    }
+    Val /= 2;
+    RegClassID = AMDGPU::SGPR_64RegClassID;
+    break;
+  case 128:
+    // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
+    // this bundle?
+    if ((Val % 4 != 0) || (Val > 96)) {
+      return MCDisassembler::Fail;
+    }
+    Val /= 4;
+    RegClassID = AMDGPU::SReg_128RegClassID;
+    break;
+  case 256:
+    // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
+    // this bundle?
+    if ((Val % 4 != 0) || (Val > 92)) {
+      return MCDisassembler::Fail;
+    }
+    Val /= 4;
+    RegClassID = AMDGPU::SReg_256RegClassID;
+    break;
+  case 512:
+    // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
+    // this bundle?
+    if ((Val % 4 != 0) || (Val > 84)) {
+      return MCDisassembler::Fail;
+    }
+    Val /= 4;
+    RegClassID = AMDGPU::SReg_512RegClassID;
+    break;
+  default:
    return MCDisassembler::Fail;
  }
-  RegID = AMDGPUMCRegisterClasses[AMDGPU::SGPR_32RegClassID].getRegister(Val);
+
+  RegID = AMDGPUMCRegisterClasses[RegClassID].getRegister(Val);
  return MCDisassembler::Success;
 }

-DecodeStatus AMDGPUDisassembler::DecodeSrcRegister(unsigned Val, 
-                                                   unsigned& RegID) const {
-  // ToDo: deal with out-of range registers  
+DecodeStatus AMDGPUDisassembler::DecodeSrc32Register(unsigned Val,
+                                                     unsigned &RegID) const {
+  // ToDo: deal with out-of range registers
  using namespace AMDGPU;
  if (Val <= 101) {
-    return DecodeSgprRegister(Val, RegID);
+    return DecodeSgprRegister(Val, RegID, 32);
  } else if ((Val >= 256) && (Val <= 511)) {
-    return DecodeVgprRegister(Val - 256, RegID);
+    return DecodeVgprRegister(Val - 256, RegID, 32);
  } else {
    switch(Val) {
    case 102: RegID = getMCReg(FLAT_SCR_LO, STI); return MCDisassembler::Success;
@ -239,8 +321,40 @@ DecodeStatus AMDGPUDisassembler::DecodeSrcRegister(unsigned Val,
  return MCDisassembler::Fail;
 }

-DecodeStatus AMDGPUDisassembler::DecodeVGPR_32RegisterClass(llvm::MCInst &Inst, 
-                                                            unsigned Imm, 
+DecodeStatus AMDGPUDisassembler::DecodeSrc64Register(unsigned Val,
+                                                     unsigned &RegID) const {
+  // ToDo: deal with out-of range registers
+  using namespace AMDGPU;
+  if (Val <= 101) {
+    return DecodeSgprRegister(Val, RegID, 64);
+  } else if ((Val >= 256) && (Val <= 511)) {
+    return DecodeVgprRegister(Val - 256, RegID, 64);
+  } else {
+    switch(Val) {
+    case 102: RegID = getMCReg(FLAT_SCR, STI); return MCDisassembler::Success;
+    case 106: RegID = getMCReg(VCC, STI); return MCDisassembler::Success;
+    case 126: RegID = getMCReg(EXEC, STI); return MCDisassembler::Success;
+    default: return MCDisassembler::Fail;
+    }
+  }
+  return MCDisassembler::Fail;
+}
+
+DecodeStatus AMDGPUDisassembler::DecodeLiteralConstant(MCInst &Inst,
+                                                       uint64_t &Literal) const {
+  // For now all literal constants are supposed to be unsigned integer
+  // ToDo: deal with signed/unsigned 64-bit integer constants
+  // ToDo: deal with float/double constants
+  if (Bytes.size() < 8) {
+    return MCDisassembler::Fail;
+  }
+  Literal =
+    0 | (Bytes[7] << 24) | (Bytes[6] << 16) | (Bytes[5] << 8) | (Bytes[4] << 0);
+  return MCDisassembler::Success;
+}
+
+DecodeStatus AMDGPUDisassembler::DecodeVGPR_32RegisterClass(llvm::MCInst &Inst,
+                                                            unsigned Imm,
                                                            uint64_t Addr) const {
  unsigned RegID;
  if (DecodeVgprRegister(Imm, RegID) == MCDisassembler::Success) {
@ -250,21 +364,30 @@ DecodeStatus AMDGPUDisassembler::DecodeVGPR_32RegisterClass(llvm::MCInst &Inst,
  return MCDisassembler::Fail;
 }

-DecodeStatus AMDGPUDisassembler::DecodeVS_32RegisterClass(MCInst &Inst, 
-                                                          unsigned Imm, 
-                                                          uint64_t Addr) const {
-  // ToDo: different opcodes allow different formats og this operands
+DecodeStatus AMDGPUDisassembler::DecodeVSRegisterClass(MCInst &Inst,
+                                                       unsigned Imm,
+                                                       uint64_t Addr,
+                                                       bool Is32) const {
+  // ToDo: different opcodes allow different formats of this operands
  if ((Imm >= 128) && (Imm <= 208)) {
    // immediate integer
    int64_t Val;
-    if (DecodeLitInteger(Imm, Val) == MCDisassembler::Success) {
+    if (DecodeImmedInteger(Imm, Val) == MCDisassembler::Success) {
      Inst.addOperand(MCOperand::createImm(Val));
      return MCDisassembler::Success;
    }
  } else if ((Imm >= 240) && (Imm <= 248)) {
-    // immediate float
-    uint32_t Val;
-    if (DecodeLitFloat(Imm, Val) == MCDisassembler::Success) {
+    // immediate float/double
+    uint64_t Val;
+    DecodeStatus status;
+    if (Is32) {
+      uint32_t Val32;
+      status = DecodeImmedFloat(Imm, Val32);
+      Val = static_cast<uint64_t>(Val32);
+    } else {
+      status = DecodeImmedDouble(Imm, Val);
+    }
+    if (status == MCDisassembler::Success) {
      Inst.addOperand(MCOperand::createImm(Val));
      return MCDisassembler::Success;
    }
@ -273,17 +396,26 @@ DecodeStatus AMDGPUDisassembler::DecodeVS_32RegisterClass(MCInst &Inst,
    // ToDo: implement LDS direct read
  } else if (Imm == 255) {
    // literal constant
-  } else if ((Imm == 125) || 
-             ((Imm >= 209) && (Imm <= 239)) || 
-             (Imm == 249) || 
-             (Imm == 250) || 
+    HasLiteral = true;
+    uint64_t Literal;
+    if (DecodeLiteralConstant(Inst, Literal) == MCDisassembler::Success) {
+      Inst.addOperand(MCOperand::createImm(Literal));
+      return MCDisassembler::Success;
+    }
+    return MCDisassembler::Fail;
+  } else if ((Imm == 125) ||
+             ((Imm >= 209) && (Imm <= 239)) ||
+             (Imm == 249) ||
+             (Imm == 250) ||
             (Imm >= 512)) {
    // reserved
    return MCDisassembler::Fail;
  } else {
    // register
    unsigned RegID;
-    if (DecodeSrcRegister(Imm, RegID) == MCDisassembler::Success) {
+    DecodeStatus status = Is32 ? DecodeSrc32Register(Imm, RegID)
+                               : DecodeSrc64Register(Imm, RegID);
+    if (status == MCDisassembler::Success) {
      Inst.addOperand(MCOperand::createReg(RegID));
      return MCDisassembler::Success;
    }
@ -291,6 +423,31 @@ DecodeStatus AMDGPUDisassembler::DecodeVS_32RegisterClass(MCInst &Inst,
  return MCDisassembler::Fail;
 }

+DecodeStatus AMDGPUDisassembler::DecodeVS_32RegisterClass(MCInst &Inst,
+                                                          unsigned Imm,
+                                                          uint64_t Addr) const {
+  return DecodeVSRegisterClass(Inst, Imm, Addr, true);
+}
+
+DecodeStatus AMDGPUDisassembler::DecodeVS_64RegisterClass(MCInst &Inst,
+                                                          unsigned Imm,
+                                                          uint64_t Addr) const {
+  return DecodeVSRegisterClass(Inst, Imm, Addr, false);
+}
+
+DecodeStatus AMDGPUDisassembler::DecodeVReg_64RegisterClass(llvm::MCInst &Inst,
+                                                            unsigned Imm,
+                                                            uint64_t Addr) const {
+  unsigned RegID;
+  if (DecodeVgprRegister(Imm, RegID, 64) == MCDisassembler::Success) {
+    Inst.addOperand(MCOperand::createReg(RegID));
+    return MCDisassembler::Success;
+  }
+  return MCDisassembler::Fail;
+}
+
+
+
 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx) {
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@ -25,9 +25,14 @@ namespace llvm {
  class MCSubtargetInfo;

  class AMDGPUDisassembler : public MCDisassembler {
+  private:
+    /// true if 32-bit literal constant is placed after instruction
+    mutable bool HasLiteral;
+    mutable ArrayRef<uint8_t> Bytes;
+
  public:
    AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
-      MCDisassembler(STI, Ctx) {}
+      MCDisassembler(STI, Ctx), HasLiteral(false) {}

    ~AMDGPUDisassembler() {}

@ -35,23 +40,41 @@ namespace llvm {
                                ArrayRef<uint8_t> Bytes, uint64_t Address,
                                raw_ostream &WS, raw_ostream &CS) const override;

-    /// Decode inline float value in VSrc field
-    DecodeStatus DecodeLitFloat(unsigned Imm, uint32_t& F) const;
-    /// Decode inline integer value in VSrc field
-    DecodeStatus DecodeLitInteger(unsigned Imm, int64_t& I) const;
+    /// Decode inline float value in SRC field
+    DecodeStatus DecodeImmedFloat(unsigned Imm, uint32_t &F) const;
+    /// Decode inline double value in SRC field
+    DecodeStatus DecodeImmedDouble(unsigned Imm, uint64_t &D) const;
+    /// Decode inline integer value in SRC field
+    DecodeStatus DecodeImmedInteger(unsigned Imm, int64_t &I) const;
    /// Decode VGPR register
-    DecodeStatus DecodeVgprRegister(unsigned Val, unsigned& RegID) const;
+    DecodeStatus DecodeVgprRegister(unsigned Val, unsigned &RegID,
+                                    unsigned Size = 32) const;
    /// Decode SGPR register
-    DecodeStatus DecodeSgprRegister(unsigned Val, unsigned& RegID) const;
-    /// Decode register in VSrc field
-    DecodeStatus DecodeSrcRegister(unsigned Val, unsigned& RegID) const;
+    DecodeStatus DecodeSgprRegister(unsigned Val, unsigned &RegID,
+                                    unsigned Size = 32) const;
+    /// Decode 32-bit register in SRC field
+    DecodeStatus DecodeSrc32Register(unsigned Val, unsigned &RegID) const;
+    /// Decode 64-bit register in SRC field
+    DecodeStatus DecodeSrc64Register(unsigned Val, unsigned &RegID) const;

-    DecodeStatus DecodeVS_32RegisterClass(MCInst &Inst, unsigned Imm, 
+    /// Decode literal constant after instruction
+    DecodeStatus DecodeLiteralConstant(MCInst &Inst, uint64_t &Literal) const;
+
+    DecodeStatus DecodeVGPR_32RegisterClass(MCInst &Inst, unsigned Imm,
+                                            uint64_t Addr) const;
+
+    DecodeStatus DecodeVSRegisterClass(MCInst &Inst, unsigned Imm,
+                                       uint64_t Addr, bool Is32) const;
+
+    DecodeStatus DecodeVS_32RegisterClass(MCInst &Inst, unsigned Imm,
                                          uint64_t Addr) const;

-    DecodeStatus DecodeVGPR_32RegisterClass(MCInst &Inst, unsigned Imm, 
+    DecodeStatus DecodeVS_64RegisterClass(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr) const;
+
+    DecodeStatus DecodeVReg_64RegisterClass(MCInst &Inst, unsigned Imm,
                                            uint64_t Addr) const;
  };
 } // namespace llvm

-#endif //LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
+#endif // LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@ -1666,7 +1666,7 @@ class VOP3_C_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
 }

 class VOP3_C_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
-                     bit HasMods = 0, bit VOP3Only = 0> :
+                      bit HasMods = 0, bit VOP3Only = 0> :
  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
  VOP3ce_vi <op>,
  SIMCInstr <opName#"_e64", SISubtarget.VI> {
--- a/test/MC/Disassembler/AMDGPU/vop1.txt
+++ b/test/MC/Disassembler/AMDGPU/vop1.txt
@ -0,0 +1,250 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s
+
+# CHECK: v_nop              ; encoding: [0x00,0x00,0x00,0x7e]
+0x00 0x00 0x00 0x7e
+
+# CHECK: v_clrexcp                       ; encoding: [0x00,0x6a,0x00,0x7e]
+0x00 0x6a 0x00 0x7e
+
+# CHECK: v_mov_b32_e32 v2, v1         ; encoding: [0x01,0x03,0x04,0x7e]
+0x01 0x03 0x04 0x7e
+
+# CHECK: v_mov_b32_e32 v1, 0.5         ; encoding: [0xf0,0x02,0x02,0x7e]
+0xf0 0x02 0x02 0x7e
+
+# CHECK: v_mov_b32_e32 v15, s100         ; encoding: [0x64,0x02,0x1e,0x7e]
+0x64 0x02 0x1e 0x7e
+
+# CHECK: v_mov_b32_e32 v90, flat_scratch_lo         ; encoding: [0x66,0x02,0xb4,0x7e]
+0x66 0x02 0xb4 0x7e
+
+# CHECK: v_mov_b32_e32 v150, vcc_lo         ; encoding: [0x6a,0x02,0x2c,0x7f]
+0x6a 0x02 0x2c 0x7f
+
+# CHECK: v_mov_b32_e32 v199, exec_lo         ; encoding: [0x7e,0x02,0x8e,0x7f]
+0x7e 0x02 0x8e 0x7f
+
+# CHECK: v_mov_b32_e32 v222, m0         ; encoding: [0x7c,0x02,0xbc,0x7f]
+0x7c 0x02 0xbc 0x7f
+
+# CHECK: v_mov_b32_e32 v255, -13         ; encoding: [0xcd,0x02,0xfe,0x7f]
+0xcd 0x02 0xfe 0x7f
+
+# CHECK: v_cvt_f32_i32_e32 v153, s98         ; encoding: [0x62,0x0a,0x32,0x7f]
+0x62 0x0a 0x32 0x7f
+
+# CHECK: v_cvt_f32_u32_e32 v33, -4.0         ; encoding: [0xf7,0x0c,0x42,0x7e]
+0xf7 0x0c 0x42 0x7e
+
+# CHECK: v_cvt_i32_f64_e32 v2, s[0:1]         ; encoding: [0x00,0x06,0x04,0x7e]
+0x00 0x06 0x04 0x7e
+
+# CHECK: v_cvt_u32_f32_e32 v123, vcc_hi  ; encoding: [0x6b,0x0e,0xf6,0x7e]
+0x6b 0x0e 0xf6 0x7e
+
+# CHECK: v_cvt_i32_f32_e32 v123, flat_scratch_lo ; encoding: [0x66,0x10,0xf6,0x7e]
+0x66 0x10 0xf6 0x7e
+
+# CHECK: v_cvt_rpi_i32_f32_e32 v123, s101 ; encoding: [0x65,0x18,0xf6,0x7e]
+0x65 0x18 0xf6 0x7e
+
+# CHECK: v_cvt_flr_i32_f32_e32 v123, -4.0 ; encoding: [0xf7,0x1a,0xf6,0x7e]
+0xf7 0x1a 0xf6 0x7e
+
+# CHECK: v_cvt_f32_f64_e32 v123, vcc      ; encoding: [0x6a,0x1e,0xf6,0x7e]
+0x6a 0x1e 0xf6 0x7e
+
+# CHECK: v_cvt_u32_f64_e32 v123, exec      ; encoding: [0x7e,0x2a,0xf6,0x7e]
+0x7e 0x2a 0xf6 0x7e
+
+# CHECK: v_fract_f32_e32 v123, m0        ; encoding: [0x7c,0x36,0xf6,0x7e]
+0x7c 0x36 0xf6 0x7e
+
+# CHECK: v_trunc_f32_e32 v123, exec_lo   ; encoding: [0x7e,0x38,0xf6,0x7e]
+0x7e 0x38 0xf6 0x7e
+
+# CHECK: v_ceil_f32_e32 v123, exec_hi    ; encoding: [0x7f,0x3a,0xf6,0x7e]
+0x7f 0x3a 0xf6 0x7e
+
+# CHECK: v_rndne_f32_e32 v123, 0         ; encoding: [0x80,0x3c,0xf6,0x7e]
+0x80 0x3c 0xf6 0x7e
+
+# CHECK: v_floor_f32_e32 v123, -0.5      ; encoding: [0xf1,0x3e,0xf6,0x7e]
+0xf1 0x3e 0xf6 0x7e
+
+# CHECK: v_exp_f32_e32 v123, 1.0         ; encoding: [0xf2,0x40,0xf6,0x7e]
+0xf2 0x40 0xf6 0x7e
+
+# CHECK: v_log_f32_e32 v123, -1.0        ; encoding: [0xf3,0x42,0xf6,0x7e]
+0xf3 0x42 0xf6 0x7e
+
+# CHECK: v_rcp_f32_e32 v123, 2.0         ; encoding: [0xf4,0x44,0xf6,0x7e]
+0xf4 0x44 0xf6 0x7e
+
+# CHECK: v_rcp_iflag_f32_e32 v123, -2.0  ; encoding: [0xf5,0x46,0xf6,0x7e]
+0xf5 0x46 0xf6 0x7e
+
+# CHECK: v_rsq_f32_e32 v123, 4.0         ; encoding: [0xf6,0x48,0xf6,0x7e]
+0xf6 0x48 0xf6 0x7e
+
+# CHECK: v_sqrt_f32_e32 v123, v12        ; encoding: [0x0c,0x4f,0xf6,0x7e]
+0x0c 0x4f 0xf6 0x7e
+
+# CHECK: v_sin_f32_e32 v123, v12         ; encoding: [0x0c,0x53,0xf6,0x7e]
+0x0c 0x53 0xf6 0x7e
+
+# CHECK: v_cos_f32_e32 v123, v12         ; encoding: [0x0c,0x55,0xf6,0x7e]
+0x0c 0x55 0xf6 0x7e
+
+# CHECK: v_not_b32_e32 v123, v12         ; encoding: [0x0c,0x57,0xf6,0x7e]
+0x0c 0x57 0xf6 0x7e
+
+# CHECK: v_bfrev_b32_e32 v123, v12       ; encoding: [0x0c,0x59,0xf6,0x7e]
+0x0c 0x59 0xf6 0x7e
+
+# CHECK: v_ffbh_u32_e32 v123, v12        ; encoding: [0x0c,0x5b,0xf6,0x7e]
+0x0c 0x5b 0xf6 0x7e
+
+# CHECK: v_ffbl_b32_e32 v123, v12        ; encoding: [0x0c,0x5d,0xf6,0x7e]
+0x0c 0x5d 0xf6 0x7e
+
+# CHECK: v_ffbh_i32_e32 v123, v12        ; encoding: [0x0c,0x5f,0xf6,0x7e]
+0x0c 0x5f 0xf6 0x7e
+
+# CHECK: v_frexp_exp_i32_f64_e32 v123, 2.0 ; encoding: [0xf4,0x60,0xf6,0x7e]
+0xf4 0x60 0xf6 0x7e
+
+# CHECK: v_frexp_exp_i32_f32_e32 v123, s33 ; encoding: [0x21,0x66,0xf6,0x7e]
+0x21 0x66 0xf6 0x7e
+
+# CHECK: v_frexp_mant_f32_e32 v123, s33  ; encoding: [0x21,0x68,0xf6,0x7e]
+0x21 0x68 0xf6 0x7e
+
+# CHECK: v_movreld_b32_e32 v123, s33     ; encoding: [0x21,0x6c,0xf6,0x7e]
+0x21 0x6c 0xf6 0x7e
+
+# CHECK: v_movrels_b32_e32 v123, s33     ; encoding: [0x21,0x6e,0xf6,0x7e]
+0x21 0x6e 0xf6 0x7e
+
+# CHECK: v_movrelsd_b32_e32 v123, s33    ; encoding: [0x21,0x70,0xf6,0x7e]
+0x21 0x70 0xf6 0x7e
+
+# CHECK: v_cvt_f16_f32_e32 v123, flat_scratch_hi ; encoding: [0x67,0x14,0xf6,0x7e]
+0x67 0x14 0xf6 0x7e
+
+# CHECK: v_cvt_f32_f16_e32 v123, s55     ; encoding: [0x37,0x16,0xf6,0x7e]
+0x37 0x16 0xf6 0x7e
+
+# CHECK: v_cvt_off_f32_i4_e32 v123, v12  ; encoding: [0x0c,0x1d,0xf6,0x7e]
+0x0c 0x1d 0xf6 0x7e
+
+# CHECK: v_cvt_f32_ubyte0_e32 v123, v12  ; encoding: [0x0c,0x23,0xf6,0x7e]
+0x0c 0x23 0xf6 0x7e
+
+# CHECK: v_cvt_f32_ubyte1_e32 v123, v12  ; encoding: [0x0c,0x25,0xf6,0x7e]
+0x0c 0x25 0xf6 0x7e
+
+# CHECK: v_cvt_f32_ubyte2_e32 v123, v12  ; encoding: [0x0c,0x27,0xf6,0x7e]
+0x0c 0x27 0xf6 0x7e
+
+# CHECK: v_cvt_f32_ubyte3_e32 v123, v12  ; encoding: [0x0c,0x29,0xf6,0x7e]
+0x0c 0x29 0xf6 0x7e
+
+# CHECK: v_cvt_f64_i32_e32 v[222:223], 1.0 ; encoding: [0xf2,0x08,0xbc,0x7f]
+0xf2 0x08 0xbc 0x7f
+
+# CHECK: v_cvt_f64_i32_e32 v[222:223], exec_hi ; encoding: [0x7f,0x08,0xbc,0x7f]
+0x7f 0x08 0xbc 0x7f
+
+# CHECK: v_cvt_f64_f32_e32 v[222:223], s33 ; encoding: [0x21,0x20,0xbc,0x7f]
+0x21 0x20 0xbc 0x7f
+
+# CHECK: v_cvt_f64_u32_e32 v[222:223], s33 ; encoding: [0x21,0x2c,0xbc,0x7f]
+0x21 0x2c 0xbc 0x7f
+
+# CHECK: v_rcp_f64_e32 v[222:223], s[22:23] ; encoding: [0x16,0x4a,0xbc,0x7f]
+0x16 0x4a 0xbc 0x7f
+
+# CHECK: v_rsq_f64_e32 v[222:223], s[22:23] ; encoding: [0x16,0x4c,0xbc,0x7f]
+0x16 0x4c 0xbc 0x7f
+
+# CHECK: v_sqrt_f64_e32 v[222:223], s[22:23] ; encoding: [0x16,0x50,0xbc,0x7f]
+0x16 0x50 0xbc 0x7f
+
+# CHECK: v_frexp_mant_f64_e32 v[222:223], s[22:23] ; encoding: [0x16,0x62,0xbc,0x7f]
+0x16 0x62 0xbc 0x7f
+
+# CHECK: v_fract_f64_e32 v[222:223], s[22:23] ; encoding: [0x16,0x64,0xbc,0x7f]
+0x16 0x64 0xbc 0x7f
+
+# CHECK: v_cvt_f16_u16_e32 v123, 23      ; encoding: [0x97,0x72,0xf6,0x7e]
+0x97 0x72 0xf6 0x7e
+
+# CHECK: v_cvt_f16_i16_e32 v123, vcc_hi  ; encoding: [0x6b,0x74,0xf6,0x7e]
+0x6b 0x74 0xf6 0x7e
+
+# CHECK: v_cvt_u16_f16_e32 v123, m0      ; encoding: [0x7c,0x76,0xf6,0x7e]
+0x7c 0x76 0xf6 0x7e
+
+# CHECK: v_cvt_i16_f16_e32 v123, exec_lo ; encoding: [0x7e,0x78,0xf6,0x7e]
+0x7e 0x78 0xf6 0x7e
+
+# CHECK: v_rcp_f16_e32 v123, 1.0         ; encoding: [0xf2,0x7a,0xf6,0x7e]
+0xf2 0x7a 0xf6 0x7e
+
+# CHECK: v_sqrt_f16_e32 v123, 4.0        ; encoding: [0xf6,0x7c,0xf6,0x7e]
+0xf6 0x7c 0xf6 0x7e
+
+# CHECK: v_rsq_f16_e32 v123, -1.0        ; encoding: [0xf3,0x7e,0xf6,0x7e]
+0xf3 0x7e 0xf6 0x7e
+
+# CHECK: v_log_f16_e32 v123, s33         ; encoding: [0x21,0x80,0xf6,0x7e]
+0x21 0x80 0xf6 0x7e
+
+# CHECK: v_exp_f16_e32 v123, v12         ; encoding: [0x0c,0x83,0xf6,0x7e]
+0x0c 0x83 0xf6 0x7e
+
+# CHECK: v_frexp_mant_f16_e32 v123, v12  ; encoding: [0x0c,0x85,0xf6,0x7e]
+0x0c 0x85 0xf6 0x7e
+
+# CHECK: v_frexp_exp_i16_f16_e32 v123, v12 ; encoding: [0x0c,0x87,0xf6,0x7e]
+0x0c 0x87 0xf6 0x7e
+
+# CHECK: v_floor_f16_e32 v123, v12       ; encoding: [0x0c,0x89,0xf6,0x7e]
+0x0c 0x89 0xf6 0x7e
+
+# CHECK: v_ceil_f16_e32 v123, v12        ; encoding: [0x0c,0x8b,0xf6,0x7e]
+0x0c 0x8b 0xf6 0x7e
+
+# CHECK: v_trunc_f16_e32 v123, s33       ; encoding: [0x21,0x8c,0xf6,0x7e]
+0x21 0x8c 0xf6 0x7e
+
+# CHECK: v_rndne_f16_e32 v123, s33       ; encoding: [0x21,0x8e,0xf6,0x7e]
+0x21 0x8e 0xf6 0x7e
+
+# CHECK: v_fract_f16_e32 v123, s33       ; encoding: [0x21,0x90,0xf6,0x7e]
+0x21 0x90 0xf6 0x7e
+
+# CHECK: v_sin_f16_e32 v123, s33         ; encoding: [0x21,0x92,0xf6,0x7e]
+0x21 0x92 0xf6 0x7e
+
+# CHECK: v_cos_f16_e32 v123, s33         ; encoding: [0x21,0x94,0xf6,0x7e]
+0x21 0x94 0xf6 0x7e
+
+# CHECK: v_mov_b32_e32 v2, 0x75bcd15     ; encoding: [0xff,0x02,0x04,0x7e,0x15,0xcd,0x5b,0x07]
+0xff 0x02 0x04 0x7e 0x15 0xcd 0x5b 0x07
+
+# CHECK: v_cvt_f32_u32_e32 v33, 0x4236b732 ; encoding: [0xff,0x0c,0x42,0x7e,0x32,0xb7,0x36,0x42]
+0xff 0x0c 0x42 0x7e 0x32 0xb7 0x36 0x42
+
+# CHECK: v_cvt_i32_f64_e32 v2, 0x4236b732 ; encoding: [0xff,0x06,0x04,0x7e,0x32,0xb7,0x36,0x42]
+0xff 0x06 0x04 0x7e 0x32 0xb7 0x36 0x42
+
+# CHECK: v_cvt_f16_u16_e32 v123, 0x3ade68b1 ; encoding: [0xff,0x72,0xf6,0x7e,0xb1,0x68,0xde,0x3a]
+0xff 0x72 0xf6 0x7e 0xb1 0x68 0xde 0x3a
+
+# CHECK: v_cvt_f16_i16_e32 v123, 0x21c2  ; encoding: [0xff,0x74,0xf6,0x7e,0xc2,0x21,0x00,0x00]
+0xff 0x74 0xf6 0x7e 0xc2 0x21 0x00 0x00
+
+# CHECK: v_cvt_u16_f16_e32 v123, 0x3f200000 ; encoding: [0xff,0x76,0xf6,0x7e,0x00,0x00,0x20,0x3f]
+0xff 0x76 0xf6 0x7e 0x00 0x00 0x20 0x3f