[AArch64][GlobalISel] Refactor + improve CMN, ADDS, and ADD emit functions

These functions were extremely similar: - `emitADD` - `emitADDS` - `emitCMN` Refactor them a little, introducing a more generic `emitInstr` function to do most of the work. Also add support for the immediate + shifted register addressing modes in each of them. Update select-uaddo.mir to show that selecing ADDS now supports folding immediates + shifts. (I don't think this can impact CMN, because the CMN checks require a G_SUB with a non-constant on the RHS.) This is around a 0.02% code size improvement on CTMark at -O3. Differential Revision: https://reviews.llvm.org/D87529
2024-11-23 19:23:23 +01:00 · 2020-09-10 13:34:15 -07:00 · 2020-09-10 13:34:15 -07:00 · 5ffe1901d5
commit 5ffe1901d5
parent d8616d8c76
2 changed files with 155 additions and 42 deletions
--- a/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@ -171,8 +171,57 @@ private:
  emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
                     MachineOperand &Predicate,
                     MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
+  MachineInstr *emitInstr(unsigned Opcode,
+                          std::initializer_list<llvm::DstOp> DstOps,
+                          std::initializer_list<llvm::SrcOp> SrcOps,
+                          MachineIRBuilder &MIRBuilder,
+                          const ComplexRendererFns &RenderFns = None) const;
+  /// Helper function to emit a binary operation such as an ADD, ADDS, etc.
+  ///
+  /// This is intended for instructions with the following opcode variants:
+  ///
+  ///  - Xri, Wri (arithmetic immediate form)
+  ///  - Xrs, Wrs (shifted register form)
+  ///  - Xrr, Wrr (register form)
+  ///
+  /// For example, for ADD, we have ADDXri, ADDWri, ADDXrs, etc.
+  ///
+  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
+  /// in a specific order.
+  ///
+  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
+  ///
+  /// \code
+  ///   const std::array<std::array<unsigned, 2>, 3> Table {
+  ///    {{AArch64::ADDXri, AArch64::ADDWri},
+  ///     {AArch64::ADDXrs, AArch64::ADDWrs},
+  ///     {AArch64::ADDXrr, AArch64::ADDWrr}}};
+  /// \endcode
+  ///
+  /// Each row in the table corresponds to a different addressing mode. Each
+  /// column corresponds to a different register size.
+  ///
+  /// \attention Rows must be structured as follows:
+  ///   - Row 0: The ri opcode variants
+  ///   - Row 1: The rs opcode variants
+  ///   - Row 2: The rr opcode variants
+  ///
+  /// \attention Columns must be structured as follows:
+  ///   - Column 0: The 64-bit opcode variants
+  ///   - Column 1: The 32-bit opcode variants
+  ///
+  /// \p Dst is the destination register of the binop to emit.
+  /// \p LHS is the left-hand operand of the binop to emit.
+  /// \p RHS is the right-hand operand of the binop to emit.
+  MachineInstr *emitBinOp(
+      const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
+      Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+      MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
+                        MachineOperand &RHS,
                        MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+                         MachineIRBuilder &MIRBuilder) const;
  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                        MachineIRBuilder &MIRBuilder) const;
  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
@ -2462,11 +2511,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
    }

    // Add and set the set condition flag.
-    unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
    MachineIRBuilder MIRBuilder(I);
-    auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
-                                        {I.getOperand(2), I.getOperand(3)});
-    constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
+    emitADDS(I.getOperand(0).getReg(), I.getOperand(2), I.getOperand(3),
+             MIRBuilder);

    // Now, put the overflow result in the register given by the first operand
    // to the G_UADDO. CSINC increments the result when the predicate is false,
@ -3749,55 +3796,70 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
  return std::make_pair(Opc, SubregIdx);
 }

+MachineInstr *AArch64InstructionSelector::emitInstr(
+    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
+    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
+    const ComplexRendererFns &RenderFns) const {
+  assert(Opcode && "Expected an opcode?");
+  assert(!isPreISelGenericOpcode(Opcode) &&
+         "Function should only be used to produce selected instructions!");
+  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
+  if (RenderFns)
+    for (auto &Fn : *RenderFns)
+      Fn(MI);
+  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+  return &*MI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitBinOp(
+    const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
+    Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+    MachineIRBuilder &MIRBuilder) const {
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+  auto Ty = MRI.getType(LHS.getReg());
+  assert(Ty.isScalar() && "Expected a scalar?");
+  unsigned Size = Ty.getSizeInBits();
+  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
+  bool Is32Bit = Size == 32;
+  if (auto Fns = selectArithImmed(RHS))
+    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
+                     MIRBuilder, Fns);
+  if (auto Fns = selectShiftedRegister(RHS))
+    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
+                     MIRBuilder, Fns);
+  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
+                   MIRBuilder);
+}
+
 MachineInstr *
 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
                                    MachineOperand &RHS,
                                    MachineIRBuilder &MIRBuilder) const {
-  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
-  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
-                                       {AArch64::ADDWrr, AArch64::ADDWri}};
-  bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
-  auto ImmFns = selectArithImmed(RHS);
-  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
-  auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
+  const std::array<std::array<unsigned, 2>, 3> OpcTable{
+      {{AArch64::ADDXri, AArch64::ADDWri},
+       {AArch64::ADDXrs, AArch64::ADDWrs},
+       {AArch64::ADDXrr, AArch64::ADDWrr}}};
+  return emitBinOp(OpcTable, DefReg, LHS, RHS, MIRBuilder);
+}

-  // If we matched a valid constant immediate, add those operands.
-  if (ImmFns) {
-    for (auto &RenderFn : *ImmFns)
-      RenderFn(AddMI);
-  } else {
-    AddMI.addUse(RHS.getReg());
-  }
-
-  constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
-  return &*AddMI;
+MachineInstr *
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
+                                     MachineOperand &RHS,
+                                     MachineIRBuilder &MIRBuilder) const {
+  const std::array<std::array<unsigned, 2>, 3> OpcTable{
+      {{AArch64::ADDSXri, AArch64::ADDSWri},
+       {AArch64::ADDSXrs, AArch64::ADDSWrs},
+       {AArch64::ADDSXrr, AArch64::ADDSWrr}}};
+  return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
 }

 MachineInstr *
 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                                    MachineIRBuilder &MIRBuilder) const {
-  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
-                                       {AArch64::ADDSWrr, AArch64::ADDSWri}};
  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
-  auto ImmFns = selectArithImmed(RHS);
-  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
-  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-
-  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
-
-  // If we matched a valid constant immediate, add those operands.
-  if (ImmFns) {
-    for (auto &RenderFn : *ImmFns)
-      RenderFn(CmpMI);
-  } else {
-    CmpMI.addUse(RHS.getReg());
-  }
-
-  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
-  return &*CmpMI;
+  return emitADDS(Is32Bit ? AArch64::WZR : AArch64::XZR, LHS, RHS, MIRBuilder);
 }

 MachineInstr *
--- a/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
@ -60,3 +60,54 @@ body:             |
    RET_ReallyLR implicit $w0

 ...
+---
+name:            uaddo_s32_imm
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1, $x2
+    ; Check that we get ADDSWri when we can fold in a constant.
+    ;
+    ; CHECK-LABEL: name: uaddo_s32_imm
+    ; CHECK: liveins: $w0, $w1, $x2
+    ; CHECK: %copy:gpr32sp = COPY $w0
+    ; CHECK: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
+    ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK: $w0 = COPY %add
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %constant:gpr(s32) = G_CONSTANT i32 16
+    %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
+    $w0 = COPY %add(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddo_s32_shifted
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1, $x2
+    ; Check that we get ADDSWrs when we can fold in a shift.
+    ;
+    ; CHECK-LABEL: name: uaddo_s32_shifted
+    ; CHECK: liveins: $w0, $w1, $x2
+    ; CHECK: %copy1:gpr32 = COPY $w0
+    ; CHECK: %copy2:gpr32 = COPY $w1
+    ; CHECK: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv
+    ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK: $w0 = COPY %add
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy1:gpr(s32) = COPY $w0
+    %copy2:gpr(s32) = COPY $w1
+    %constant:gpr(s32) = G_CONSTANT i32 16
+    %shift:gpr(s32) = G_SHL %copy2(s32), %constant(s32)
+    %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy1, %shift
+    $w0 = COPY %add(s32)
+    RET_ReallyLR implicit $w0