[AArch64] v8.3-a complex number support

New instructions are added to AArch32 and AArch64 to aid floating-point multiplication and addition of complex numbers, where the complex numbers are packed in a vector register as a pair of elements. The Imaginary part of the number is placed in the more significant element, and the Real part of the number is placed in the less significant element. Differential Revision: https://reviews.llvm.org/D36792 llvm-svn: 312228
2024-11-23 11:13:28 +01:00 · 2017-08-31 09:27:04 +00:00 · 2017-08-31 09:27:04 +00:00 · 3d30332768
commit 3d30332768
parent 42bd8958bb
7 changed files with 531 additions and 0 deletions
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@ -9391,6 +9391,238 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
 }
 } // let Predicates = [HasNeon, HasRDM]

+//----------------------------------------------------------------------------
+// ARMv8.3 Complex ADD/MLA instructions
+//----------------------------------------------------------------------------
+
+class ComplexRotationOperand<int Angle, int Remainder, string Type>
+  : AsmOperandClass {
+  let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">";
+  let DiagnosticType = "InvalidComplexRotation" # Type;
+  let Name = "ComplexRotation" # Type;
+}
+def complexrotateop : Operand<i32> {
+  let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
+  let PrintMethod = "printComplexRotationOp<90, 0>";
+}
+def complexrotateopodd : Operand<i32> {
+  let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
+  let PrintMethod = "printComplexRotationOp<180, 90>";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
+                                     RegisterOperand regtype, Operand rottype,
+                                     string asm, string kind, list<dag> pattern>
+  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
+      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
+      "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
+    Sched<[WriteV]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<1> rot;
+  let Inst{31}    = 0;
+  let Inst{30}    = Q;
+  let Inst{29}    = U;
+  let Inst{28-24} = 0b01110;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0;
+  let Inst{20-16} = Rm;
+  let Inst{15-13} = opcode;
+  // Non-tied version (FCADD) only has one rotation bit
+  let Inst{12}    = rot;
+  let Inst{11}    = 0;
+  let Inst{10}    = 1;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
+multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
+                                          string asm, SDPatternOperator OpNode>{
+  let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
+              asm, ".4h",
+              [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
+                                              (v4f16 V64:$Rn),
+                                              (v4f16 V64:$Rm),
+                                              (rottype i32:$rot)))]>;
+
+  def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
+              asm, ".8h",
+              [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+                                               (v8f16 V128:$Rn),
+                                               (v8f16 V128:$Rm),
+                                               (rottype i32:$rot)))]>;
+  }
+
+  let Predicates = [HasV8_3a, HasNEON] in {
+  def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
+              asm, ".2s",
+              [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
+                                              (v2f32 V64:$Rn),
+                                              (v2f32 V64:$Rm),
+                                              (rottype i32:$rot)))]>;
+
+  def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
+              asm, ".4s",
+              [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+                                               (v4f32 V128:$Rn),
+                                               (v4f32 V128:$Rm),
+                                               (rottype i32:$rot)))]>;
+
+  def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
+              asm, ".2d",
+              [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
+                                               (v2f64 V128:$Rn),
+                                               (v2f64 V128:$Rm),
+                                               (rottype i32:$rot)))]>;
+  }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
+                                         bits<3> opcode,
+                                         RegisterOperand regtype,
+                                         Operand rottype, string asm,
+                                         string kind, list<dag> pattern>
+  : I<(outs regtype:$dst),
+      (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
+      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
+      "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
+    Sched<[WriteV]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<2> rot;
+  let Inst{31}    = 0;
+  let Inst{30}    = Q;
+  let Inst{29}    = U;
+  let Inst{28-24} = 0b01110;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0;
+  let Inst{20-16} = Rm;
+  let Inst{15-13} = opcode;
+  let Inst{12-11} = rot;
+  let Inst{10}    = 1;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
+multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
+                                             Operand rottype, string asm,
+                                             SDPatternOperator OpNode> {
+  let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
+              rottype, asm, ".4h",
+              [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
+                                              (v4f16 V64:$Rn),
+                                              (v4f16 V64:$Rm),
+                                              (rottype i32:$rot)))]>;
+
+  def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
+              rottype, asm, ".8h",
+              [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+                                               (v8f16 V128:$Rn),
+                                               (v8f16 V128:$Rm),
+                                               (rottype i32:$rot)))]>;
+  }
+
+  let Predicates = [HasV8_3a, HasNEON] in {
+  def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
+              rottype, asm, ".2s",
+              [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
+                                              (v2f32 V64:$Rn),
+                                              (v2f32 V64:$Rm),
+                                              (rottype i32:$rot)))]>;
+
+  def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
+              rottype, asm, ".4s",
+              [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+                                               (v4f32 V128:$Rn),
+                                               (v4f32 V128:$Rm),
+                                               (rottype i32:$rot)))]>;
+
+  def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
+              rottype, asm, ".2d",
+              [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
+                                               (v2f64 V128:$Rn),
+                                               (v2f64 V128:$Rm),
+                                               (rottype i32:$rot)))]>;
+  }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
+                                 bit opc1, bit opc2, RegisterOperand dst_reg,
+                                 RegisterOperand lhs_reg,
+                                 RegisterOperand rhs_reg, Operand vec_idx,
+                                 Operand rottype, string asm, string apple_kind,
+                                 string dst_kind, string lhs_kind,
+                                 string rhs_kind, list<dag> pattern>
+  : I<(outs dst_reg:$dst),
+      (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot),
+      asm,
+      "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
+      "$idx, $rot" # "|" # apple_kind #
+      "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
+    Sched<[WriteV]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<2> rot;
+
+  let Inst{31}    = 0;
+  let Inst{30}    = Q;
+  let Inst{29}    = U;
+  let Inst{28}    = Scalar;
+  let Inst{27-24} = 0b1111;
+  let Inst{23-22} = size;
+  // Bit 21 must be set by the derived class.
+  let Inst{20-16} = Rm;
+  let Inst{15}    = opc1;
+  let Inst{14-13} = rot;
+  let Inst{12}    = opc2;
+  // Bit 11 must be set by the derived class.
+  let Inst{10}    = 0;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
+// The complex instructions index by pairs of elements, so the VectorIndexes
+// don't match the lane types, and the index bits are different to the other
+// classes.
+multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
+                                     string asm, SDPatternOperator OpNode> {
+  let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
+  def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
+                      V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
+                      ".4h", ".h", []> {
+    bits<1> idx;
+    let Inst{11} = 0;
+    let Inst{21} = idx{0};
+  }
+
+  def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2,
+                      V128, V128, V128, VectorIndexS, rottype, asm, ".8h",
+                      ".8h", ".8h", ".h", []> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+  } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
+
+  let Predicates = [HasV8_3a,HasNEON] in {
+  def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
+                      V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
+                      ".4s", ".4s", ".s", []> {
+    bits<1> idx;
+    let Inst{11} = idx{0};
+    let Inst{21} = 0;
+  }
+  } // Predicates = [HasV8_3a,HasNEON]
+}
+
 //----------------------------------------------------------------------------
 // Crypto extensions
 //----------------------------------------------------------------------------
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@ -460,6 +460,15 @@ let Predicates = [HasRCPC] in {
  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
 }

+// v8.3a complex add and multiply-accumulate. No predicate here, that is done
+// inside the multiclass as the FP16 versions need different predicates.
+defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
+                                               "fcmla", null_frag>;
+defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
+                                           "fcadd", null_frag>;
+defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
+                                       null_frag>;
+
 let Predicates = [HasV8_3a] in {
  // v8.3a Pointer Authentication
  let Uses = [LR], Defs = [LR] in {
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@ -835,6 +835,17 @@ public:
      AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum);
  }

+  template<int64_t Angle, int64_t Remainder>
+  bool isComplexRotation() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    uint64_t Value = CE->getValue();
+
+    return (Value % Angle == Remainder && Value <= 270);
+  }
+
  /// Is this a vector list with the type implicit (presumably attached to the
  /// instruction itself)?
  template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
@ -1527,6 +1538,18 @@ public:
    Inst.addOperand(MCOperand::createImm((~Value >> Shift) & 0xffff));
  }

+  void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() / 90));
+  }
+
+  void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm((MCE->getValue() - 90) / 180));
+  }
+
  void print(raw_ostream &OS) const override;

  static std::unique_ptr<AArch64Operand>
@ -3402,6 +3425,10 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
    return Error(Loc, "expected readable system register");
  case Match_MSR:
    return Error(Loc, "expected writable system register or pstate");
+  case Match_InvalidComplexRotationEven:
+    return Error(Loc, "complex rotation must be 0, 90, 180 or 270.");
+  case Match_InvalidComplexRotationOdd:
+    return Error(Loc, "complex rotation must be 90 or 270.");
  case Match_MnemonicFail: {
    std::string Suggestion = AArch64MnemonicSpellCheck(
        ((AArch64Operand &)*Operands[0]).getToken(),
@ -3802,6 +3829,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  case Match_InvalidIndexS:
  case Match_InvalidIndexD:
  case Match_InvalidLabel:
+  case Match_InvalidComplexRotationEven:
+  case Match_InvalidComplexRotationOdd:
  case Match_MSR:
  case Match_MRS: {
    if (ErrorInfo >= Operands.size())
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@ -1331,3 +1331,12 @@ void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
  uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
  O << format("#%#016llx", Val);
 }
+
+template<int64_t Angle, int64_t Remainder>
+void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  O << "#" << (Val * Angle) + Remainder;
+}
+
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@ -158,6 +158,9 @@ protected:
                              const MCSubtargetInfo &STI, raw_ostream &O);
  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
                              const MCSubtargetInfo &STI, raw_ostream &O);
+  template<int64_t Angle, int64_t Remainder>
+  void printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
  template<unsigned size>
  void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
                                    const MCSubtargetInfo &STI,
--- a/test/MC/AArch64/armv8.3a-complex.s
+++ b/test/MC/AArch64/armv8.3a-complex.s
@ -0,0 +1,148 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,-fullfp16 < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FP16
+// RUN: FileCheck --check-prefix=STDERR --check-prefix=STDERR-NO-FP16 %s < %t
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,+fullfp16 < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
+// RUN: FileCheck --check-prefix=STDERR --check-prefix=STDERR-FP16 %s < %t
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-v8.3a,+fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=NO-V83A
+
+
+// ==== FCMLA vector ====
+// Types
+  fcmla v0.4h, v1.4h, v2.4h, #0
+// FP16: fcmla   v0.4h, v1.4h, v2.4h, #0 // encoding: [0x20,0xc4,0x42,0x2e]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.8h, v1.8h, v2.8h, #0
+// FP16: fcmla   v0.8h, v1.8h, v2.8h, #0 // encoding: [0x20,0xc4,0x42,0x6e]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2s, v1.2s, v2.2s, #0
+// CHECK: fcmla   v0.2s, v1.2s, v2.2s, #0 // encoding: [0x20,0xc4,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.4s, v1.4s, v2.4s, #0
+// CHECK: fcmla   v0.4s, v1.4s, v2.4s, #0 // encoding: [0x20,0xc4,0x82,0x6e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2d, v1.2d, v2.2d, #0
+// CHECK: fcmla   v0.2d, v1.2d, v2.2d, #0 // encoding: [0x20,0xc4,0xc2,0x6e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+
+// Rotations
+  fcmla v0.2s, v1.2s, v2.2s, #0
+// CHECK: fcmla   v0.2s, v1.2s, v2.2s, #0 // encoding: [0x20,0xc4,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2s, v1.2s, v2.2s, #90
+// CHECK: fcmla   v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xcc,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2s, v1.2s, v2.2s, #180
+// CHECK: fcmla   v0.2s, v1.2s, v2.2s, #180 // encoding: [0x20,0xd4,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2s, v1.2s, v2.2s, #270
+// CHECK: fcmla   v0.2s, v1.2s, v2.2s, #270 // encoding: [0x20,0xdc,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+
+// Invalid rotations
+  fcmla v0.2s, v1.2s, v2.2s, #1
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270.
+  fcmla v0.2s, v1.2s, v2.2s, #360
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270.
+  fcmla v0.2s, v1.2s, v2.2s, #-90
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270.
+
+// ==== FCADD vector ====
+// Types
+  fcadd v0.4h, v1.4h, v2.4h, #90
+// FP16: fcadd   v0.4h, v1.4h, v2.4h, #90 // encoding: [0x20,0xe4,0x42,0x2e]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcadd v0.8h, v1.8h, v2.8h, #90
+// FP16: fcadd   v0.8h, v1.8h, v2.8h, #90 // encoding: [0x20,0xe4,0x42,0x6e]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcadd v0.2s, v1.2s, v2.2s, #90
+// CHECK: fcadd   v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xe4,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcadd v0.4s, v1.4s, v2.4s, #90
+// CHECK: fcadd   v0.4s, v1.4s, v2.4s, #90 // encoding: [0x20,0xe4,0x82,0x6e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcadd v0.2d, v1.2d, v2.2d, #90
+// CHECK: fcadd   v0.2d, v1.2d, v2.2d, #90 // encoding: [0x20,0xe4,0xc2,0x6e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+
+// Rotations
+  fcadd v0.2s, v1.2s, v2.2s, #90
+// CHECK: fcadd   v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xe4,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcadd v0.2s, v1.2s, v2.2s, #270
+// CHECK: fcadd   v0.2s, v1.2s, v2.2s, #270 // encoding: [0x20,0xf4,0x82,0x2e]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+
+// Invalid rotations
+  fcadd v0.2s, v1.2s, v2.2s, #1
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270.
+  fcadd v0.2s, v1.2s, v2.2s, #360
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270.
+  fcadd v0.2s, v1.2s, v2.2s, #-90
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270.
+  fcadd v0.2s, v1.2s, v2.2s, #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270.
+  fcadd v0.2s, v1.2s, v2.2s, #180
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270.
+
+// ==== FCMLA indexed ====
+// Types
+  fcmla v0.4h, v1.4h, v2.h[0], #0
+// FP16: fcmla   v0.4h, v1.4h, v2.h[0], #0 // encoding: [0x20,0x10,0x42,0x2f]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.8h, v1.8h, v2.h[0], #0
+// FP16: fcmla   v0.8h, v1.8h, v2.h[0], #0 // encoding: [0x20,0x10,0x42,0x6f]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2s, v1.2s, v2.s[0], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: invalid operand for instruction
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: invalid operand for instruction
+  fcmla v0.4s, v1.4s, v2.s[0], #0
+// CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #0 // encoding: [0x20,0x10,0x82,0x6f]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.2d, v1.2d, v2.d[0], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: invalid operand for instruction
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: invalid operand for instruction
+
+// Rotations
+  fcmla v0.4s, v1.4s, v2.s[0], #90
+// CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #90 // encoding: [0x20,0x30,0x82,0x6f]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.4s, v1.4s, v2.s[0], #180
+// CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #180 // encoding: [0x20,0x50,0x82,0x6f]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.4s, v1.4s, v2.s[0], #270
+// CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #270 // encoding: [0x20,0x70,0x82,0x6f]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+
+// Valid indices
+  fcmla v0.4h, v1.4h, v2.h[1], #0
+// FP16: fcmla   v0.4h, v1.4h, v2.h[1], #0 // encoding: [0x20,0x10,0x62,0x2f]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.8h, v1.8h, v2.h[3], #0
+// FP16: fcmla   v0.8h, v1.8h, v2.h[3], #0 // encoding: [0x20,0x18,0x62,0x6f]
+// STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+  fcmla v0.4s, v1.4s, v2.s[1], #0
+// CHECK: fcmla   v0.4s, v1.4s, v2.s[1], #0 // encoding: [0x20,0x18,0x82,0x6f]
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+
+// Invalid indices
+  fcmla v0.4h, v1.4h, v2.h[2], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: vector lane must be an integer in range [0, 1].
+  fcmla v0.8h, v1.8h, v2.h[4], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: vector lane must be an integer in range [0, 3].
+  fcmla v0.4s, v1.4s, v2.s[2], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: vector lane must be an integer in range [0, 1].
+
+// Invalid rotations
+  fcmla v0.4s, v1.4s, v2.s[0], #1
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270.
+  fcmla v0.4s, v1.4s, v2.s[0], #360
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270.
+  fcmla v0.4s, v1.4s, v2.s[0], #-90
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270.
--- a/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
+++ b/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
@ -0,0 +1,101 @@
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,-fullfp16 --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
+# RUN: FileCheck %s < %t --check-prefix=NO-FP16
+# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,+fullfp16 --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-v8.3a,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NO-V83A
+
+###### FCMLA vector
+[0x20,0xc4,0x42,0x2e]
+# FP16: fcmla v0.4h, v1.4h, v2.4h, #0
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xc4,0x42,0x6e]
+# FP16: fcmla v0.8h, v1.8h, v2.8h, #0
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xc4,0x82,0x2e]
+# CHECK: fcmla v0.2s, v1.2s, v2.2s, #0
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xc4,0x82,0x6e]
+# CHECK: fcmla v0.4s, v1.4s, v2.4s, #0
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xc4,0xc2,0x6e]
+# CHECK: fcmla v0.2d, v1.2d, v2.2d, #0
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+
+
+[0x20,0xc4,0x82,0x2e]
+# CHECK: fcmla v0.2s, v1.2s, v2.2s, #0
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xcc,0x82,0x2e]
+# CHECK: fcmla v0.2s, v1.2s, v2.2s, #90
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xd4,0x82,0x2e]
+# CHECK: fcmla v0.2s, v1.2s, v2.2s, #180
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xdc,0x82,0x2e]
+# CHECK: fcmla v0.2s, v1.2s, v2.2s, #270
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+
+
+###### FCADD vector
+[0x20,0xe4,0x42,0x2e]
+# FP16: fcadd v0.4h, v1.4h, v2.4h, #90
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xe4,0x42,0x6e]
+# FP16: fcadd v0.8h, v1.8h, v2.8h, #90
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xe4,0x82,0x2e]
+# CHECK: fcadd v0.2s, v1.2s, v2.2s, #90
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xe4,0x82,0x6e]
+# CHECK: fcadd v0.4s, v1.4s, v2.4s, #90
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xe4,0xc2,0x6e]
+# CHECK: fcadd v0.2d, v1.2d, v2.2d, #90
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+
+
+[0x20,0xe4,0x82,0x2e]
+# CHECK: fcadd v0.2s, v1.2s, v2.2s, #90
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0xf4,0x82,0x2e]
+# CHECK: fcadd v0.2s, v1.2s, v2.2s, #270
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+
+[0x20,0x10,0x42,0x2f]
+# FP16: fcmla v0.4h, v1.4h, v2.h[0], #0
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0x10,0x42,0x6f]
+# FP16: fcmla v0.8h, v1.8h, v2.h[0], #0
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0x10,0x82,0x6f]
+# CHECK: fcmla v0.4s, v1.4s, v2.s[0], #0
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+
+
+[0x20,0x30,0x82,0x6f]
+# CHECK: fcmla v0.4s, v1.4s, v2.s[0], #90
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0x50,0x82,0x6f]
+# CHECK: fcmla v0.4s, v1.4s, v2.s[0], #180
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0x70,0x82,0x6f]
+# CHECK: fcmla v0.4s, v1.4s, v2.s[0], #270
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+
+
+[0x20,0x10,0x62,0x2f]
+# FP16: fcmla v0.4h, v1.4h, v2.h[1], #0
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0x18,0x62,0x6f]
+# FP16: fcmla v0.8h, v1.8h, v2.h[3], #0
+# NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding
+# NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: warning: invalid instruction encoding
+[0x20,0x18,0x82,0x6f]
+# CHECK: fcmla v0.4s, v1.4s, v2.s[1], #0
+# NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: warning: invalid instruction encoding