1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[RISCV] Teach vsetvli insertion to remember when predecessors have same AVL and SEW/LMUL ratio if their VTYPEs otherwise mismatch.

Previously we went directly to unknown state on VTYPE mismatch.
If we instead remember the partial match, we can use this to
still use X0, X0 vsetvli in successors if AVL and needed SEW/LMUL
ratio match.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D104069
This commit is contained in:
Craig Topper 2021-06-18 12:10:17 -07:00
parent f2d99918a7
commit a2969894c5
3 changed files with 134 additions and 8 deletions

View File

@ -58,10 +58,12 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1; uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1; uint8_t MaskAgnostic : 1;
uint8_t MaskRegOp : 1; uint8_t MaskRegOp : 1;
uint8_t SEWLMULRatioOnly : 1;
public: public:
VSETVLIInfo() VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false) {} : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() { static VSETVLIInfo getUnknown() {
VSETVLIInfo Info; VSETVLIInfo Info;
@ -127,16 +129,20 @@ public:
} }
unsigned encodeVTYPE() const { unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown"); "Can't encode VTYPE for uninitialized or unknown");
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
} }
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
bool hasSameVTYPE(const VSETVLIInfo &Other) const { bool hasSameVTYPE(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() && assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos"); "Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() && assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state"); "Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
Other.MaskAgnostic); Other.MaskAgnostic);
@ -172,10 +178,16 @@ public:
bool isCompatible(const VSETVLIInfo &InstrInfo) const { bool isCompatible(const VSETVLIInfo &InstrInfo) const {
assert(isValid() && InstrInfo.isValid() && assert(isValid() && InstrInfo.isValid() &&
"Can't compare invalid VSETVLIInfos"); "Can't compare invalid VSETVLIInfos");
assert(!InstrInfo.SEWLMULRatioOnly &&
"Expected a valid VTYPE for instruction!");
// Nothing is compatible with Unknown. // Nothing is compatible with Unknown.
if (isUnknown() || InstrInfo.isUnknown()) if (isUnknown() || InstrInfo.isUnknown())
return false; return false;
// If only our VLMAX ratio is valid, then this isn't compatible.
if (SEWLMULRatioOnly)
return false;
// If the instruction doesn't need an AVLReg and the SEW matches, consider // If the instruction doesn't need an AVLReg and the SEW matches, consider
// it/ compatible. // it/ compatible.
if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) { if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) {
@ -209,8 +221,19 @@ public:
if (Other.isUnknown()) if (Other.isUnknown())
return isUnknown(); return isUnknown();
// Otherwise compare the VTYPE and AVL. if (!hasSameAVL(Other))
return hasSameVTYPE(Other) && hasSameAVL(Other); return false;
// If only the VLMAX is valid, check that it is the same.
if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly)
return hasSameVLMAX(Other);
// If the full VTYPE is valid, check that it is the same.
if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly)
return hasSameVTYPE(Other);
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
return false;
} }
// Calculate the VSETVLIInfo visible to a block assuming this and Other are // Calculate the VSETVLIInfo visible to a block assuming this and Other are
@ -224,10 +247,23 @@ public:
if (!isValid()) if (!isValid())
return Other; return Other;
// If either is unknown, the result is unknown.
if (isUnknown() || Other.isUnknown())
return VSETVLIInfo::getUnknown();
// If we have an exact, match return this.
if (*this == Other) if (*this == Other)
return *this; return *this;
// If the configurations don't match, assume unknown. // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
// return an SEW/LMUL ratio only value.
if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
VSETVLIInfo MergeInfo = *this;
MergeInfo.SEWLMULRatioOnly = true;
return MergeInfo;
}
// Otherwise the result is unknown.
return VSETVLIInfo::getUnknown(); return VSETVLIInfo::getUnknown();
} }
@ -444,7 +480,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
// and the last VL/VTYPE we observed is the same, we don't need a // and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here. // VSETVLI here.
if (!CurInfo.isUnknown() && Require.hasAVLReg() && if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) { Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
Require.hasSameVTYPE(CurInfo)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {

View File

@ -584,7 +584,7 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, mu ; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
; RV32-NEXT: vslideup.vi v25, v26, 1 ; RV32-NEXT: vslideup.vi v25, v26, 1
; RV32-NEXT: .LBB8_4: # %else2 ; RV32-NEXT: .LBB8_4: # %else2
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; RV32-NEXT: vse32.v v25, (a1) ; RV32-NEXT: vse32.v v25, (a1)
; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret ; RV32-NEXT: ret
@ -644,7 +644,7 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, mu ; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
; RV64-NEXT: vslideup.vi v25, v26, 1 ; RV64-NEXT: vslideup.vi v25, v26, 1
; RV64-NEXT: .LBB8_4: # %else2 ; RV64-NEXT: .LBB8_4: # %else2
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; RV64-NEXT: vse32.v v25, (a1) ; RV64-NEXT: vse32.v v25, (a1)
; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret ; RV64-NEXT: ret

View File

@ -496,3 +496,92 @@ declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* nocapture, i64) declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* nocapture, i64)
declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64) declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64)
declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64) declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64)
; We need a vsetvli in the last block because the predecessors have different
; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
; we don't need to read AVL and can keep VL unchanged.
define <vscale x 2 x i32> @test_vsetvli_x0_x0(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind {
; CHECK-LABEL: test_vsetvli_x0_x0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: andi a0, a3, 1
; CHECK-NEXT: beqz a0, .LBB9_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v26, (a1)
; CHECK-NEXT: vwadd.vx v8, v26, zero
; CHECK-NEXT: .LBB9_2: # %if.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vadd.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
br i1 %cond, label %if, label %if.end
if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16> %b, i16 0, i64 %vl)
br label %if.end
if.end:
%d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
%e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
ret <vscale x 2 x i32> %e
}
declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>*, i64)
declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>*, i64)
declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16>, i16, i64)
declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64)
; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
; a predecessor we know the vtype for.
define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i16>* %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind {
; CHECK-LABEL: test_vsetvli_x0_x0_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: andi a0, a4, 1
; CHECK-NEXT: beqz a0, .LBB10_2
; CHECK-NEXT: # %bb.1: # %if
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v26, (a1)
; CHECK-NEXT: vwadd.wv v25, v25, v26
; CHECK-NEXT: .LBB10_2: # %if.end
; CHECK-NEXT: andi a0, a5, 1
; CHECK-NEXT: beqz a0, .LBB10_4
; CHECK-NEXT: # %bb.3: # %if2
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v26, (a2)
; CHECK-NEXT: vwadd.wv v25, v25, v26
; CHECK-NEXT: .LBB10_4: # %if2.end
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vadd.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
br i1 %cond, label %if, label %if.end
if:
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
br label %if.end
if.end:
%d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
br i1 %cond2, label %if2, label %if2.end
if2:
%e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %z, i64 %vl)
%f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
br label %if2.end
if2.end:
%g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
%h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl)
ret <vscale x 2 x i32> %h
}
declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i16>, i64)