[VE] Correct LVLGen (LVL instruction insert pass)

SX Aurora VE uses an intermediate representation similar to VP as its MIR. VE itself uses invidiual VL register as its own vector length register at the hardware level. So, LLVM needs to insert load VL (LVL) instruction just before vector instructions if the value of VL is changed. This LVLGen pass generates LVL instructions for such purpose. Previously, a bug is pointed out in D91416. This patch correct this bug and add a regression test. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D92716
2024-11-22 10:42:39 +01:00 · 2020-12-05 16:53:39 +09:00 · 2020-12-05 16:53:39 +09:00 · c3b7c2e861
commit c3b7c2e861
parent 57cdf9b21a
2 changed files with 55 additions and 11 deletions
--- a/lib/Target/VE/LVLGen.cpp
+++ b/lib/Target/VE/LVLGen.cpp
@ -68,6 +68,12 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
    MachineBasicBlock::iterator MI = I;

+    // Check whether MI uses a vector length operand.  If so, we prepare for VL
+    // register.  We would like to reuse VL register as much as possible.  We
+    // also would like to keep the number of LEA instructions as fewer as
+    // possible.  Therefore, we use a regular scalar register to hold immediate
+    // values to load VL register.  And try to reuse identical scalar registers
+    // to avoid new LVLr instructions as much as possible.
    unsigned Reg = getVL(*MI);
    if (Reg != VE::NoRegister) {
      LLVM_DEBUG(dbgs() << "Vector instruction found: ");
@ -78,6 +84,8 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
                        << ". ");

      if (!HasRegForVL || RegForVL != Reg) {
+        // Use VL, but a different value in a different scalar register.
+        // So, generate new LVL instruction just before the current instruction.
        LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
                          << RegName(Reg) << ".\n");
        BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
@ -87,18 +95,15 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
      } else {
        LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
      }
-    } else if (HasRegForVL) {
-      // Old VL is overwritten, so disable HasRegForVL.
-      if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
-        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
-        LLVM_DEBUG(MI->dump());
-        HasRegForVL = false;
-      }
    }
+    // Check the update of a given scalar register holding an immediate value
+    // for VL register.  Also, a call doesn't preserve VL register.
    if (HasRegForVL) {
-      // The latest VL is killed, so disable HasRegForVL.
-      if (MI->killsRegister(RegForVL, TRI)) {
-        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
+      if (MI->definesRegister(RegForVL, TRI) ||
+          MI->modifiesRegister(RegForVL, TRI) ||
+          MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+        // The latest VL is needed to be updated, so disable HasRegForVL.
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
        LLVM_DEBUG(MI->dump());
        HasRegForVL = false;
      }
--- a/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
+++ b/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
@ -42,7 +42,6 @@ define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
 ; Check that no redundant 'lvl' is inserted when vector length does not change
 ; in a basic block.

-
 ; Function Attrs: nounwind
 define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
 ; CHECK-LABEL: stable_vl:
@ -64,3 +63,43 @@ define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
  ret void
 }
+
+;;; Check the case we have a call in the middle of vector instructions.
+
+; Function Attrs: nounwind
+define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
+; CHECK-LABEL: call_invl:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    st %s18, 288(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    st %s19, 296(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    st %s20, 304(, %s11) # 8-byte Folded Spill
+; CHECK-NEXT:    or %s18, 0, %s1
+; CHECK-NEXT:    and %s20, %s0, (32)0
+; CHECK-NEXT:    lvl %s20
+; CHECK-NEXT:    vld %v0, 8, %s1
+; CHECK-NEXT:    or %s19, 0, %s2
+; CHECK-NEXT:    vst %v0, 16, %s2
+; CHECK-NEXT:    lea %s0, fun@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, fun@hi(, %s0)
+; CHECK-NEXT:    bsic %s10, (, %s12)
+; CHECK-NEXT:    lvl %s20
+; CHECK-NEXT:    vld %v0, 16, %s18
+; CHECK-NEXT:    vst %v0, 16, %s19
+; CHECK-NEXT:    vld %v0, 8, %s18
+; CHECK-NEXT:    vst %v0, 16, %s19
+; CHECK-NEXT:    ld %s20, 304(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    ld %s19, 296(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    ld %s18, 288(, %s11) # 8-byte Folded Reload
+; CHECK-NEXT:    or %s11, 0, %s9
+  %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
+  call void @fun()
+  %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
+  %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
+  tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
+  ret void
+}
+
+declare void @fun()