1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[VE] Correct LVLGen (LVL instruction insert pass)

SX Aurora VE uses an intermediate representation similar to VP as its MIR.
VE itself uses invidiual VL register as its own vector length register at
the hardware level.  So, LLVM needs to insert load VL (LVL) instruction just
before vector instructions if the value of VL is changed.  This LVLGen pass
generates LVL instructions for such purpose.  Previously, a bug is pointed
out in D91416.  This patch correct this bug and add a regression test.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D92716
This commit is contained in:
Kazushi (Jam) Marukawa 2020-12-05 16:53:39 +09:00
parent 57cdf9b21a
commit c3b7c2e861
2 changed files with 55 additions and 11 deletions

View File

@ -68,6 +68,12 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
MachineBasicBlock::iterator MI = I;
// Check whether MI uses a vector length operand. If so, we prepare for VL
// register. We would like to reuse VL register as much as possible. We
// also would like to keep the number of LEA instructions as fewer as
// possible. Therefore, we use a regular scalar register to hold immediate
// values to load VL register. And try to reuse identical scalar registers
// to avoid new LVLr instructions as much as possible.
unsigned Reg = getVL(*MI);
if (Reg != VE::NoRegister) {
LLVM_DEBUG(dbgs() << "Vector instruction found: ");
@ -78,6 +84,8 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
<< ". ");
if (!HasRegForVL || RegForVL != Reg) {
// Use VL, but a different value in a different scalar register.
// So, generate new LVL instruction just before the current instruction.
LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
<< RegName(Reg) << ".\n");
BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
@ -87,18 +95,15 @@ bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
} else {
LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
}
} else if (HasRegForVL) {
// Old VL is overwritten, so disable HasRegForVL.
if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) {
LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
LLVM_DEBUG(MI->dump());
HasRegForVL = false;
}
}
// Check the update of a given scalar register holding an immediate value
// for VL register. Also, a call doesn't preserve VL register.
if (HasRegForVL) {
// The latest VL is killed, so disable HasRegForVL.
if (MI->killsRegister(RegForVL, TRI)) {
LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: ");
if (MI->definesRegister(RegForVL, TRI) ||
MI->modifiesRegister(RegForVL, TRI) ||
MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
// The latest VL is needed to be updated, so disable HasRegForVL.
LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
LLVM_DEBUG(MI->dump());
HasRegForVL = false;
}

View File

@ -42,7 +42,6 @@ define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
; Check that no redundant 'lvl' is inserted when vector length does not change
; in a basic block.
; Function Attrs: nounwind
define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
; CHECK-LABEL: stable_vl:
@ -64,3 +63,43 @@ define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
ret void
}
;;; Check the case we have a call in the middle of vector instructions.
; Function Attrs: nounwind
define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
; CHECK-LABEL: call_invl:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill
; CHECK-NEXT: st %s19, 296(, %s11) # 8-byte Folded Spill
; CHECK-NEXT: st %s20, 304(, %s11) # 8-byte Folded Spill
; CHECK-NEXT: or %s18, 0, %s1
; CHECK-NEXT: and %s20, %s0, (32)0
; CHECK-NEXT: lvl %s20
; CHECK-NEXT: vld %v0, 8, %s1
; CHECK-NEXT: or %s19, 0, %s2
; CHECK-NEXT: vst %v0, 16, %s2
; CHECK-NEXT: lea %s0, fun@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0)
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: lvl %s20
; CHECK-NEXT: vld %v0, 16, %s18
; CHECK-NEXT: vst %v0, 16, %s19
; CHECK-NEXT: vld %v0, 8, %s18
; CHECK-NEXT: vst %v0, 16, %s19
; CHECK-NEXT: ld %s20, 304(, %s11) # 8-byte Folded Reload
; CHECK-NEXT: ld %s19, 296(, %s11) # 8-byte Folded Reload
; CHECK-NEXT: ld %s18, 288(, %s11) # 8-byte Folded Reload
; CHECK-NEXT: or %s11, 0, %s9
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
call void @fun()
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
ret void
}
declare void @fun()