mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[ARM] Attempt to make Tail predication / RDA more resilient to empty blocks
There are a number of places in RDA where we assume the block will not be empty. This isn't necessarily true for tail predicated loops where we have removed instructions. This attempt to make the pass more resilient to empty blocks, not casting pointers to machine instructions where they would be invalid. The test contains a case that was previously failing, but recently been hidden on trunk. It contains an empty block to begin with to show a similar error. Differential Revision: https://reviews.llvm.org/D88926
This commit is contained in:
parent
0a8029e199
commit
995d885e43
@ -355,8 +355,10 @@ ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
|
|||||||
Uses.insert(&MI);
|
Uses.insert(&MI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MachineInstr *Last = &*MBB->getLastNonDebugInstr();
|
auto Last = MBB->getLastNonDebugInstr();
|
||||||
return isReachingDefLiveOut(Last, PhysReg);
|
if (Last == MBB->end())
|
||||||
|
return true;
|
||||||
|
return isReachingDefLiveOut(&*Last, PhysReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -481,8 +483,9 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const {
|
|||||||
bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
|
bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
|
||||||
int PhysReg) const {
|
int PhysReg) const {
|
||||||
MachineBasicBlock *MBB = MI->getParent();
|
MachineBasicBlock *MBB = MI->getParent();
|
||||||
MachineInstr *Last = &*MBB->getLastNonDebugInstr();
|
auto Last = MBB->getLastNonDebugInstr();
|
||||||
if (getReachingDef(MI, PhysReg) != getReachingDef(Last, PhysReg))
|
if (Last != MBB->end() &&
|
||||||
|
getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
|
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
|
||||||
@ -499,9 +502,9 @@ ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const {
|
|||||||
if (!LiveRegs.contains(PhysReg))
|
if (!LiveRegs.contains(PhysReg))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
MachineInstr *Last = &*MBB->getLastNonDebugInstr();
|
auto Last = MBB->getLastNonDebugInstr();
|
||||||
int Def = getReachingDef(MI, PhysReg);
|
int Def = getReachingDef(MI, PhysReg);
|
||||||
if (getReachingDef(Last, PhysReg) != Def)
|
if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Finally check that the last instruction doesn't redefine the register.
|
// Finally check that the last instruction doesn't redefine the register.
|
||||||
@ -519,11 +522,14 @@ MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
|
|||||||
if (!LiveRegs.contains(PhysReg))
|
if (!LiveRegs.contains(PhysReg))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
MachineInstr *Last = &*MBB->getLastNonDebugInstr();
|
auto Last = MBB->getLastNonDebugInstr();
|
||||||
int Def = getReachingDef(Last, PhysReg);
|
if (Last == MBB->end())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
int Def = getReachingDef(&*Last, PhysReg);
|
||||||
for (auto &MO : Last->operands())
|
for (auto &MO : Last->operands())
|
||||||
if (isValidRegDefOf(MO, PhysReg))
|
if (isValidRegDefOf(MO, PhysReg))
|
||||||
return Last;
|
return &*Last;
|
||||||
|
|
||||||
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
|
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
|
||||||
}
|
}
|
||||||
|
@ -675,6 +675,8 @@ bool LowOverheadLoop::ValidateTailPredicate() {
|
|||||||
// before entering the loop.
|
// before entering the loop.
|
||||||
auto CannotProvideElements = [this](MachineBasicBlock *MBB,
|
auto CannotProvideElements = [this](MachineBasicBlock *MBB,
|
||||||
Register NumElements) {
|
Register NumElements) {
|
||||||
|
if (MBB->empty())
|
||||||
|
return false;
|
||||||
// NumElements is redefined in this block.
|
// NumElements is redefined in this block.
|
||||||
if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
|
if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
|
||||||
return true;
|
return true;
|
||||||
|
634
test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
Normal file
634
test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
Normal file
@ -0,0 +1,634 @@
|
|||||||
|
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
|
||||||
|
|
||||||
|
# This test was originally hitting problems with empty blocks. That went away
|
||||||
|
# but the underlying problem (empty blocks causing iterator issues) still remains.
|
||||||
|
# The test adds an extra empty block to one of the loops to test this.
|
||||||
|
|
||||||
|
# CHECK: LETP
|
||||||
|
|
||||||
|
--- |
|
||||||
|
%struct.DCT_InstanceTypeDef = type { float*, i32, i32 }
|
||||||
|
|
||||||
|
; Function Attrs: nofree nounwind
|
||||||
|
define hidden arm_aapcs_vfpcc void @test(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* %pIn, float* nocapture %pOut) {
|
||||||
|
entry:
|
||||||
|
%NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 2
|
||||||
|
%0 = load i32, i32* %NumInputs, align 4
|
||||||
|
%NumFilters = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 1
|
||||||
|
%1 = load i32, i32* %NumFilters, align 4
|
||||||
|
%pDCTCoefs34 = bitcast %struct.DCT_InstanceTypeDef* %S to float**
|
||||||
|
%2 = load float*, float** %pDCTCoefs34, align 4
|
||||||
|
%3 = add i32 %0, 3
|
||||||
|
%4 = icmp slt i32 %0, 4
|
||||||
|
%smin36 = select i1 %4, i32 %0, i32 4
|
||||||
|
%5 = sub i32 %3, %smin36
|
||||||
|
%6 = lshr i32 %5, 2
|
||||||
|
%7 = add nuw nsw i32 %6, 1
|
||||||
|
call void @llvm.set.loop.iterations.i32(i32 %7)
|
||||||
|
br label %do.body
|
||||||
|
|
||||||
|
do.body: ; preds = %do.body, %entry
|
||||||
|
%count.0 = phi i32 [ %0, %entry ], [ %12, %do.body ]
|
||||||
|
%pInT.0 = phi float* [ %pIn, %entry ], [ %add.ptr, %do.body ]
|
||||||
|
%sumVec.0 = phi <4 x float> [ zeroinitializer, %entry ], [ %11, %do.body ]
|
||||||
|
%8 = phi i32 [ %7, %entry ], [ %13, %do.body ]
|
||||||
|
%pInT.033 = bitcast float* %pInT.0 to <4 x float>*
|
||||||
|
%9 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.0)
|
||||||
|
%10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer)
|
||||||
|
%11 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0, <4 x float> %10, <4 x i1> %9, <4 x float> undef)
|
||||||
|
%add.ptr = getelementptr inbounds float, float* %pInT.0, i32 4
|
||||||
|
%12 = add i32 %count.0, -4
|
||||||
|
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
|
||||||
|
%14 = icmp ne i32 %13, 0
|
||||||
|
br i1 %14, label %do.body, label %do.end
|
||||||
|
|
||||||
|
do.end: ; preds = %do.body
|
||||||
|
%15 = extractelement <4 x float> %11, i32 0
|
||||||
|
%16 = extractelement <4 x float> %11, i32 1
|
||||||
|
%add = fadd fast float %15, %16
|
||||||
|
%17 = extractelement <4 x float> %11, i32 2
|
||||||
|
%add1 = fadd fast float %add, %17
|
||||||
|
%18 = extractelement <4 x float> %11, i32 3
|
||||||
|
%add2 = fadd fast float %add1, %18
|
||||||
|
%19 = load float, float* %2, align 4
|
||||||
|
%mul = fmul fast float %19, %add2
|
||||||
|
store float %mul, float* %pOut, align 4
|
||||||
|
%sub4 = add i32 %1, -4
|
||||||
|
%cmp5201 = icmp ugt i32 %sub4, 1
|
||||||
|
br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader
|
||||||
|
|
||||||
|
for.body.lr.ph: ; preds = %do.end
|
||||||
|
%scevgep = getelementptr float, float* %pIn, i32 4
|
||||||
|
%20 = add i32 %0, 4
|
||||||
|
%scevgep5 = getelementptr float, float* %2, i32 %20
|
||||||
|
%21 = shl i32 %0, 4
|
||||||
|
%22 = shl i32 %0, 1
|
||||||
|
%23 = add i32 %22, 4
|
||||||
|
%scevgep12 = getelementptr float, float* %2, i32 %23
|
||||||
|
%24 = mul i32 %0, 3
|
||||||
|
%25 = add i32 %24, 4
|
||||||
|
%scevgep19 = getelementptr float, float* %2, i32 %25
|
||||||
|
%26 = shl i32 %0, 2
|
||||||
|
%27 = add i32 %26, 4
|
||||||
|
%scevgep26 = getelementptr float, float* %2, i32 %27
|
||||||
|
%28 = add i32 %0, -1
|
||||||
|
%29 = add i32 %0, -4
|
||||||
|
%30 = icmp slt i32 %29, 4
|
||||||
|
%smin35 = select i1 %30, i32 %29, i32 4
|
||||||
|
%31 = sub i32 %28, %smin35
|
||||||
|
%32 = lshr i32 %31, 2
|
||||||
|
%33 = add nuw nsw i32 %32, 1
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.cond54.preheader: ; preds = %do.end33, %do.end
|
||||||
|
%k.0.lcssa = phi i32 [ 1, %do.end ], [ %add53, %do.end33 ]
|
||||||
|
%cmp55199 = icmp ult i32 %k.0.lcssa, %1
|
||||||
|
br i1 %cmp55199, label %for.body56.preheader, label %for.end72
|
||||||
|
|
||||||
|
for.body56.preheader: ; preds = %for.cond54.preheader
|
||||||
|
%34 = add i32 %0, 3
|
||||||
|
%35 = icmp slt i32 %0, 4
|
||||||
|
%smin = select i1 %35, i32 %0, i32 4
|
||||||
|
%36 = sub i32 %34, %smin
|
||||||
|
%37 = lshr i32 %36, 2
|
||||||
|
%38 = add nuw nsw i32 %37, 1
|
||||||
|
br label %for.body56
|
||||||
|
|
||||||
|
for.body: ; preds = %do.end33, %for.body.lr.ph
|
||||||
|
%lsr.iv27 = phi float* [ %88, %do.end33 ], [ %scevgep26, %for.body.lr.ph ]
|
||||||
|
%lsr.iv20 = phi float* [ %87, %do.end33 ], [ %scevgep19, %for.body.lr.ph ]
|
||||||
|
%lsr.iv13 = phi float* [ %86, %do.end33 ], [ %scevgep12, %for.body.lr.ph ]
|
||||||
|
%lsr.iv6 = phi float* [ %85, %do.end33 ], [ %scevgep5, %for.body.lr.ph ]
|
||||||
|
%k.0202 = phi i32 [ 1, %for.body.lr.ph ], [ %add53, %do.end33 ]
|
||||||
|
%39 = bitcast float* %pIn to <4 x float>*
|
||||||
|
%mul7 = mul i32 %k.0202, %0
|
||||||
|
%arrayidx8 = getelementptr inbounds float, float* %2, i32 %mul7
|
||||||
|
%add9 = add nuw nsw i32 %k.0202, 1
|
||||||
|
%mul10 = mul i32 %add9, %0
|
||||||
|
%arrayidx11 = getelementptr inbounds float, float* %2, i32 %mul10
|
||||||
|
%add12 = add nuw nsw i32 %k.0202, 2
|
||||||
|
%mul13 = mul i32 %add12, %0
|
||||||
|
%arrayidx14 = getelementptr inbounds float, float* %2, i32 %mul13
|
||||||
|
%add15 = add i32 %k.0202, 3
|
||||||
|
%mul16 = mul i32 %add15, %0
|
||||||
|
%arrayidx17 = getelementptr inbounds float, float* %2, i32 %mul16
|
||||||
|
%40 = load <4 x float>, <4 x float>* %39, align 4
|
||||||
|
%41 = bitcast float* %arrayidx8 to <4 x float>*
|
||||||
|
%42 = load <4 x float>, <4 x float>* %41, align 4
|
||||||
|
%43 = fmul fast <4 x float> %42, %40
|
||||||
|
%44 = bitcast float* %arrayidx11 to <4 x float>*
|
||||||
|
%45 = load <4 x float>, <4 x float>* %44, align 4
|
||||||
|
%46 = fmul fast <4 x float> %45, %40
|
||||||
|
%47 = bitcast float* %arrayidx14 to <4 x float>*
|
||||||
|
%48 = load <4 x float>, <4 x float>* %47, align 4
|
||||||
|
%49 = fmul fast <4 x float> %48, %40
|
||||||
|
%50 = bitcast float* %arrayidx17 to <4 x float>*
|
||||||
|
%51 = load <4 x float>, <4 x float>* %50, align 4
|
||||||
|
%52 = fmul fast <4 x float> %51, %40
|
||||||
|
call void @llvm.set.loop.iterations.i32(i32 %33)
|
||||||
|
br label %do.body24
|
||||||
|
|
||||||
|
do.body24: ; preds = %do.body24, %for.body
|
||||||
|
%lsr.iv30 = phi float* [ %scevgep31, %do.body24 ], [ %lsr.iv27, %for.body ]
|
||||||
|
%lsr.iv23 = phi float* [ %scevgep24, %do.body24 ], [ %lsr.iv20, %for.body ]
|
||||||
|
%lsr.iv16 = phi float* [ %scevgep17, %do.body24 ], [ %lsr.iv13, %for.body ]
|
||||||
|
%lsr.iv9 = phi float* [ %scevgep10, %do.body24 ], [ %lsr.iv6, %for.body ]
|
||||||
|
%lsr.iv = phi float* [ %scevgep3, %do.body24 ], [ %scevgep, %for.body ]
|
||||||
|
%sumVec0.0 = phi <4 x float> [ %43, %for.body ], [ %56, %do.body24 ]
|
||||||
|
%sumVec1.0 = phi <4 x float> [ %46, %for.body ], [ %58, %do.body24 ]
|
||||||
|
%sumVec2.0 = phi <4 x float> [ %49, %for.body ], [ %60, %do.body24 ]
|
||||||
|
%sumVec3.0 = phi <4 x float> [ %52, %for.body ], [ %62, %do.body24 ]
|
||||||
|
%53 = phi i32 [ %33, %for.body ], [ %63, %do.body24 ]
|
||||||
|
%lsr.iv4 = bitcast float* %lsr.iv to <4 x float>*
|
||||||
|
%lsr.iv911 = bitcast float* %lsr.iv9 to <4 x float>*
|
||||||
|
%lsr.iv1618 = bitcast float* %lsr.iv16 to <4 x float>*
|
||||||
|
%lsr.iv2325 = bitcast float* %lsr.iv23 to <4 x float>*
|
||||||
|
%lsr.iv3032 = bitcast float* %lsr.iv30 to <4 x float>*
|
||||||
|
%54 = load <4 x float>, <4 x float>* %lsr.iv4, align 4
|
||||||
|
%55 = load <4 x float>, <4 x float>* %lsr.iv911, align 4
|
||||||
|
%56 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %55, <4 x float> %sumVec0.0)
|
||||||
|
%57 = load <4 x float>, <4 x float>* %lsr.iv1618, align 4
|
||||||
|
%58 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %57, <4 x float> %sumVec1.0)
|
||||||
|
%59 = load <4 x float>, <4 x float>* %lsr.iv2325, align 4
|
||||||
|
%60 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %59, <4 x float> %sumVec2.0)
|
||||||
|
%61 = load <4 x float>, <4 x float>* %lsr.iv3032, align 4
|
||||||
|
%62 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %61, <4 x float> %sumVec3.0)
|
||||||
|
%scevgep3 = getelementptr float, float* %lsr.iv, i32 4
|
||||||
|
%scevgep10 = getelementptr float, float* %lsr.iv9, i32 4
|
||||||
|
%scevgep17 = getelementptr float, float* %lsr.iv16, i32 4
|
||||||
|
%scevgep24 = getelementptr float, float* %lsr.iv23, i32 4
|
||||||
|
%scevgep31 = getelementptr float, float* %lsr.iv30, i32 4
|
||||||
|
%63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1)
|
||||||
|
%64 = icmp ne i32 %63, 0
|
||||||
|
br i1 %64, label %do.body24, label %do.end33
|
||||||
|
|
||||||
|
do.end33: ; preds = %do.body24
|
||||||
|
%65 = bitcast float* %lsr.iv27 to i1*
|
||||||
|
%66 = bitcast float* %lsr.iv20 to i1*
|
||||||
|
%67 = bitcast float* %lsr.iv13 to i1*
|
||||||
|
%68 = bitcast float* %lsr.iv6 to i1*
|
||||||
|
%69 = extractelement <4 x float> %56, i32 0
|
||||||
|
%70 = extractelement <4 x float> %56, i32 1
|
||||||
|
%add34 = fadd fast float %69, %70
|
||||||
|
%71 = extractelement <4 x float> %56, i32 2
|
||||||
|
%add35 = fadd fast float %add34, %71
|
||||||
|
%72 = extractelement <4 x float> %56, i32 3
|
||||||
|
%add36 = fadd fast float %add35, %72
|
||||||
|
%arrayidx37 = getelementptr inbounds float, float* %pOut, i32 %k.0202
|
||||||
|
store float %add36, float* %arrayidx37, align 4
|
||||||
|
%73 = extractelement <4 x float> %58, i32 0
|
||||||
|
%74 = extractelement <4 x float> %58, i32 1
|
||||||
|
%add38 = fadd fast float %73, %74
|
||||||
|
%75 = extractelement <4 x float> %58, i32 2
|
||||||
|
%add39 = fadd fast float %add38, %75
|
||||||
|
%76 = extractelement <4 x float> %58, i32 3
|
||||||
|
%add40 = fadd fast float %add39, %76
|
||||||
|
%arrayidx42 = getelementptr inbounds float, float* %pOut, i32 %add9
|
||||||
|
store float %add40, float* %arrayidx42, align 4
|
||||||
|
%77 = extractelement <4 x float> %60, i32 0
|
||||||
|
%78 = extractelement <4 x float> %60, i32 1
|
||||||
|
%add43 = fadd fast float %77, %78
|
||||||
|
%79 = extractelement <4 x float> %60, i32 2
|
||||||
|
%add44 = fadd fast float %add43, %79
|
||||||
|
%80 = extractelement <4 x float> %60, i32 3
|
||||||
|
%add45 = fadd fast float %add44, %80
|
||||||
|
%arrayidx47 = getelementptr inbounds float, float* %pOut, i32 %add12
|
||||||
|
store float %add45, float* %arrayidx47, align 4
|
||||||
|
%81 = extractelement <4 x float> %62, i32 0
|
||||||
|
%82 = extractelement <4 x float> %62, i32 1
|
||||||
|
%add48 = fadd fast float %81, %82
|
||||||
|
%83 = extractelement <4 x float> %62, i32 2
|
||||||
|
%add49 = fadd fast float %add48, %83
|
||||||
|
%84 = extractelement <4 x float> %62, i32 3
|
||||||
|
%add50 = fadd fast float %add49, %84
|
||||||
|
%arrayidx52 = getelementptr inbounds float, float* %pOut, i32 %add15
|
||||||
|
store float %add50, float* %arrayidx52, align 4
|
||||||
|
%add53 = add i32 %k.0202, 4
|
||||||
|
%scevgep8 = getelementptr i1, i1* %68, i32 %21
|
||||||
|
%85 = bitcast i1* %scevgep8 to float*
|
||||||
|
%scevgep15 = getelementptr i1, i1* %67, i32 %21
|
||||||
|
%86 = bitcast i1* %scevgep15 to float*
|
||||||
|
%scevgep22 = getelementptr i1, i1* %66, i32 %21
|
||||||
|
%87 = bitcast i1* %scevgep22 to float*
|
||||||
|
%scevgep29 = getelementptr i1, i1* %65, i32 %21
|
||||||
|
%88 = bitcast i1* %scevgep29 to float*
|
||||||
|
%cmp5 = icmp ult i32 %add53, %sub4
|
||||||
|
br i1 %cmp5, label %for.body, label %for.cond54.preheader
|
||||||
|
|
||||||
|
for.body56: ; preds = %for.body56.preheader, %do.end66
|
||||||
|
%k.1200 = phi i32 [ %inc, %do.end66 ], [ %k.0.lcssa, %for.body56.preheader ]
|
||||||
|
%mul57 = mul i32 %k.1200, %0
|
||||||
|
%arrayidx58 = getelementptr inbounds float, float* %2, i32 %mul57
|
||||||
|
call void @llvm.set.loop.iterations.i32(i32 %38)
|
||||||
|
br label %do.body59
|
||||||
|
|
||||||
|
do.body59: ; preds = %do.body59, %for.body56
|
||||||
|
%count.2 = phi i32 [ %0, %for.body56 ], [ %94, %do.body59 ]
|
||||||
|
%pInT.2 = phi float* [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ]
|
||||||
|
%pCos0.1 = phi float* [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ]
|
||||||
|
%sumVec.1 = phi <4 x float> [ zeroinitializer, %for.body56 ], [ %93, %do.body59 ]
|
||||||
|
%89 = phi i32 [ %38, %for.body56 ], [ %95, %do.body59 ]
|
||||||
|
%pInT.21 = bitcast float* %pInT.2 to <4 x float>*
|
||||||
|
%pCos0.12 = bitcast float* %pCos0.1 to <4 x float>*
|
||||||
|
%90 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.2)
|
||||||
|
%91 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.21, i32 4, <4 x i1> %90, <4 x float> zeroinitializer)
|
||||||
|
%92 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pCos0.12, i32 4, <4 x i1> %90, <4 x float> zeroinitializer)
|
||||||
|
%93 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %91, <4 x float> %92, <4 x float> %sumVec.1, <4 x i1> %90)
|
||||||
|
%add.ptr61 = getelementptr inbounds float, float* %pInT.2, i32 4
|
||||||
|
%add.ptr62 = getelementptr inbounds float, float* %pCos0.1, i32 4
|
||||||
|
%94 = add i32 %count.2, -4
|
||||||
|
%95 = call i32 @llvm.loop.decrement.reg.i32(i32 %89, i32 1)
|
||||||
|
%96 = icmp ne i32 %95, 0
|
||||||
|
br i1 %96, label %do.body59, label %do.end66
|
||||||
|
|
||||||
|
do.end66: ; preds = %do.body59
|
||||||
|
%97 = extractelement <4 x float> %93, i32 0
|
||||||
|
%98 = extractelement <4 x float> %93, i32 1
|
||||||
|
%add67 = fadd fast float %97, %98
|
||||||
|
%99 = extractelement <4 x float> %93, i32 2
|
||||||
|
%add68 = fadd fast float %add67, %99
|
||||||
|
%100 = extractelement <4 x float> %93, i32 3
|
||||||
|
%add69 = fadd fast float %add68, %100
|
||||||
|
%arrayidx70 = getelementptr inbounds float, float* %pOut, i32 %k.1200
|
||||||
|
store float %add69, float* %arrayidx70, align 4
|
||||||
|
%inc = add nuw i32 %k.1200, 1
|
||||||
|
%exitcond.not = icmp eq i32 %inc, %1
|
||||||
|
br i1 %exitcond.not, label %for.end72, label %for.body56
|
||||||
|
|
||||||
|
for.end72: ; preds = %do.end66, %for.cond54.preheader
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
|
||||||
|
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
|
||||||
|
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
|
||||||
|
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #3
|
||||||
|
declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1
|
||||||
|
declare void @llvm.set.loop.iterations.i32(i32) #4
|
||||||
|
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test
|
||||||
|
alignment: 4
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
hasWinCFI: false
|
||||||
|
registers: []
|
||||||
|
liveins:
|
||||||
|
- { reg: '$r0', virtual-reg: '' }
|
||||||
|
- { reg: '$r1', virtual-reg: '' }
|
||||||
|
- { reg: '$r2', virtual-reg: '' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 112
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 8
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 0
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
localFrameSize: 0
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack:
|
||||||
|
- { id: 0, name: '', type: spill-slot, offset: -76, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 1, name: '', type: spill-slot, offset: -80, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 2, name: '', type: spill-slot, offset: -84, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 3, name: '', type: spill-slot, offset: -88, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 4, name: '', type: spill-slot, offset: -92, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 5, name: '', type: spill-slot, offset: -96, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 6, name: '', type: spill-slot, offset: -100, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 7, name: '', type: spill-slot, offset: -104, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 8, name: '', type: spill-slot, offset: -108, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 9, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 10, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 11, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 12, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 13, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 14, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 15, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 16, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 17, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4,
|
||||||
|
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 18, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8,
|
||||||
|
stack-id: default, callee-saved-register: '$d11', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 19, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8,
|
||||||
|
stack-id: default, callee-saved-register: '$d10', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 20, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8,
|
||||||
|
stack-id: default, callee-saved-register: '$d9', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
- { id: 21, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8,
|
||||||
|
stack-id: default, callee-saved-register: '$d8', callee-saved-restored: true,
|
||||||
|
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||||
|
callSites: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
successors: %bb.1(0x80000000)
|
||||||
|
liveins: $r0, $r1, $r2, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11
|
||||||
|
|
||||||
|
$sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr
|
||||||
|
frame-setup CFI_INSTRUCTION def_cfa_offset 36
|
||||||
|
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r11, -8
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r10, -12
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r9, -16
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r8, -20
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r7, -24
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r6, -28
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r5, -32
|
||||||
|
frame-setup CFI_INSTRUCTION offset $r4, -36
|
||||||
|
$sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
|
||||||
|
frame-setup CFI_INSTRUCTION def_cfa_offset 40
|
||||||
|
$sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d8, killed $d9, killed $d10, killed $d11
|
||||||
|
frame-setup CFI_INSTRUCTION def_cfa_offset 72
|
||||||
|
frame-setup CFI_INSTRUCTION offset $d11, -48
|
||||||
|
frame-setup CFI_INSTRUCTION offset $d10, -56
|
||||||
|
frame-setup CFI_INSTRUCTION offset $d9, -64
|
||||||
|
frame-setup CFI_INSTRUCTION offset $d8, -72
|
||||||
|
$sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg
|
||||||
|
frame-setup CFI_INSTRUCTION def_cfa_offset 112
|
||||||
|
renamable $r4 = tLDRi renamable $r0, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.NumInputs)
|
||||||
|
$r5 = tMOVr killed $r1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r11 = t2LDRi12 renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.pDCTCoefs34)
|
||||||
|
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
|
||||||
|
$r1 = tMOVr $r4, 14 /* CC::al */, $noreg
|
||||||
|
tCMPi8 renamable $r4, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
t2IT 10, 8, implicit-def $itstate
|
||||||
|
renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate
|
||||||
|
renamable $r1, dead $cpsr = tSUBrr renamable $r4, killed renamable $r1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 3, 14 /* CC::al */, $noreg
|
||||||
|
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r3 = tLDRi killed renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.NumFilters)
|
||||||
|
$r0 = tMOVr $r4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
|
||||||
|
$r1 = tMOVr $r5, 14 /* CC::al */, $noreg
|
||||||
|
t2DoLoopStart renamable $lr
|
||||||
|
|
||||||
|
bb.1.do.body (align 4):
|
||||||
|
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||||
|
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r11
|
||||||
|
|
||||||
|
renamable $vpr = MVE_VCTP32 renamable $r0, 0, $noreg
|
||||||
|
MVE_VPST 4, implicit $vpr
|
||||||
|
renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.pInT.033, align 4)
|
||||||
|
renamable $q0 = MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, undef renamable $q0
|
||||||
|
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||||
|
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
|
||||||
|
tB %bb.2, 14 /* CC::al */, $noreg
|
||||||
|
|
||||||
|
bb.2.do.end:
|
||||||
|
successors: %bb.3(0x40000000), %bb.7(0x40000000)
|
||||||
|
liveins: $q0, $r2, $r3, $r4, $r5, $r11
|
||||||
|
|
||||||
|
renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r0, dead $cpsr = tSUBi3 renamable $r3, 4, 14 /* CC::al */, $noreg
|
||||||
|
tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store 4 into %stack.8)
|
||||||
|
renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
|
||||||
|
tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1)
|
||||||
|
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
|
||||||
|
renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.2)
|
||||||
|
tCMPi8 killed renamable $r0, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
|
||||||
|
VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pOut)
|
||||||
|
t2Bcc %bb.7, 3 /* CC::lo */, killed $cpsr
|
||||||
|
|
||||||
|
bb.3.for.body.lr.ph:
|
||||||
|
successors: %bb.4(0x80000000)
|
||||||
|
liveins: $r0, $r2, $r4, $r5, $r11
|
||||||
|
|
||||||
|
renamable $r6 = t2ADDri renamable $r5, 16, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r1, dead $cpsr = tSUBi3 renamable $r4, 4, 14 /* CC::al */, $noreg
|
||||||
|
tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store 4 into %stack.5)
|
||||||
|
renamable $r6, dead $cpsr = tLSLri renamable $r4, 4, 14 /* CC::al */, $noreg
|
||||||
|
tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
tSTRspi killed renamable $r6, $sp, 3, 14 /* CC::al */, $noreg :: (store 4 into %stack.6)
|
||||||
|
t2IT 10, 8, implicit-def $itstate
|
||||||
|
renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate
|
||||||
|
renamable $r7 = t2ADDrs renamable $r4, renamable $r4, 10, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r1, dead $cpsr = tMVN killed renamable $r1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r1 = tADDhirr killed renamable $r1, renamable $r4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r12 = t2ADDrs renamable $r11, renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r3 = t2ADDrs renamable $r11, renamable $r4, 26, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $lr = t2ADDrs renamable $r11, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r7 = t2ADDrs renamable $r11, renamable $r4, 34, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r1 = nuw nsw t2ADDrs renamable $r0, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r6 = t2ADDri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r12 = t2ADDri killed renamable $r3, 16, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
tSTRspi killed renamable $r1, $sp, 2, 14 /* CC::al */, $noreg :: (store 4 into %stack.7)
|
||||||
|
renamable $r1 = t2ADDri killed renamable $lr, 16, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r10 = t2ADDri killed renamable $r7, 16, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
tSTRspi renamable $r4, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2)
|
||||||
|
t2STRDi8 $r11, $r5, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3)
|
||||||
|
|
||||||
|
bb.4.for.body (align 4):
|
||||||
|
successors: %bb.5(0x80000000)
|
||||||
|
liveins: $r0, $r1, $r2, $r4, $r5, $r6, $r10, $r11, $r12
|
||||||
|
|
||||||
|
renamable $r3 = t2MUL renamable $r0, renamable $r4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r7, dead $cpsr = nuw nsw tADDi3 renamable $r0, 1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r8 = nuw nsw t2ADDri renamable $r0, 2, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
tSTRspi renamable $r7, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0)
|
||||||
|
renamable $r9 = t2ADDri renamable $r0, 3, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r7, dead $cpsr = tMUL renamable $r4, killed renamable $r7, 14 /* CC::al */, $noreg
|
||||||
|
renamable $q0 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg :: (load 16 from %ir.39, align 4)
|
||||||
|
renamable $r3 = t2ADDrs renamable $r11, killed renamable $r3, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r5 = t2MUL renamable $r8, renamable $r4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r4 = t2MUL renamable $r9, killed renamable $r4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r7 = t2ADDrs renamable $r11, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r5 = t2ADDrs renamable $r11, killed renamable $r5, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r4 = t2ADDrs killed renamable $r11, killed renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $q1 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %ir.41, align 4)
|
||||||
|
renamable $q3 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q3
|
||||||
|
renamable $q1 = MVE_VLDRWU32 killed renamable $r7, 0, 0, $noreg :: (load 16 from %ir.44, align 4)
|
||||||
|
renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q2
|
||||||
|
renamable $q1 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg :: (load 16 from %ir.47, align 4)
|
||||||
|
renamable $q1 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
|
||||||
|
renamable $q4 = MVE_VLDRWU32 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.50, align 4)
|
||||||
|
renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q4, killed renamable $q0, 0, $noreg, undef renamable $q0
|
||||||
|
renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.7)
|
||||||
|
$r3 = tMOVr $r10, 14 /* CC::al */, $noreg
|
||||||
|
$r5 = tMOVr $r1, 14 /* CC::al */, $noreg
|
||||||
|
$r4 = tMOVr $r12, 14 /* CC::al */, $noreg
|
||||||
|
t2DoLoopStart renamable $lr
|
||||||
|
$r7 = tMOVr $r6, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r11 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %stack.5)
|
||||||
|
|
||||||
|
bb.5.do.body24 (align 4):
|
||||||
|
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
|
||||||
|
liveins: $lr, $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $r12
|
||||||
|
|
||||||
|
renamable $r11, renamable $q4 = MVE_VLDRWU32_post killed renamable $r11, 16, 0, $noreg :: (load 16 from %ir.lsr.iv4, align 4)
|
||||||
|
renamable $r7, renamable $q5 = MVE_VLDRWU32_post killed renamable $r7, 16, 0, $noreg :: (load 16 from %ir.lsr.iv911, align 4)
|
||||||
|
renamable $q3 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q3, renamable $q4, killed renamable $q5, 0, $noreg
|
||||||
|
renamable $r4, renamable $q5 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1618, align 4)
|
||||||
|
renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q2, renamable $q4, killed renamable $q5, 0, $noreg
|
||||||
|
renamable $r5, renamable $q5 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load 16 from %ir.lsr.iv2325, align 4)
|
||||||
|
renamable $q1 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q1, renamable $q4, killed renamable $q5, 0, $noreg
|
||||||
|
renamable $r3, renamable $q5 = MVE_VLDRWU32_post killed renamable $r3, 16, 0, $noreg :: (load 16 from %ir.lsr.iv3032, align 4)
|
||||||
|
renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q4, killed renamable $q5, 0, $noreg
|
||||||
|
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||||
|
t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr
|
||||||
|
tB %bb.6, 14 /* CC::al */, $noreg
|
||||||
|
|
||||||
|
bb.6.do.end33:
|
||||||
|
successors: %bb.4(0x7c000000), %bb.7(0x04000000)
|
||||||
|
liveins: $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r6, $r8, $r9, $r10, $r12
|
||||||
|
|
||||||
|
renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3
|
||||||
|
renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2
|
||||||
|
renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0)
|
||||||
|
renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r3 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r7 = t2ADDrs renamable $r2, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1
|
||||||
|
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
|
||||||
|
VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx37)
|
||||||
|
VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx42)
|
||||||
|
renamable $r3 = t2ADDrs renamable $r2, killed renamable $r8, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $r7 = t2ADDrs renamable $r2, killed renamable $r9, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
VSTRS killed renamable $s4, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx47)
|
||||||
|
VSTRS killed renamable $s0, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx52)
|
||||||
|
$r11, $r5 = t2LDRDi8 $sp, 20, 14 /* CC::al */, $noreg :: (load 4 from %stack.4), (load 4 from %stack.3)
|
||||||
|
renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r7 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg :: (load 4 from %stack.6)
|
||||||
|
renamable $r3 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1)
|
||||||
|
renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2)
|
||||||
|
renamable $r6 = tADDhirr killed renamable $r6, renamable $r7, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r12 = tADDhirr killed renamable $r12, renamable $r7, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r1 = tADDhirr killed renamable $r1, renamable $r7, 14 /* CC::al */, $noreg
|
||||||
|
tCMPr renamable $r0, killed renamable $r3, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
renamable $r10 = tADDhirr killed renamable $r10, killed renamable $r7, 14 /* CC::al */, $noreg
|
||||||
|
t2Bcc %bb.4, 3 /* CC::lo */, killed $cpsr
|
||||||
|
|
||||||
|
bb.7.for.cond54.preheader:
|
||||||
|
successors: %bb.8(0x40000000), %bb.12(0x40000000)
|
||||||
|
liveins: $r0, $r2, $r4, $r5, $r11
|
||||||
|
|
||||||
|
renamable $r12 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8)
|
||||||
|
tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
tBcc %bb.12, 2 /* CC::hs */, killed $cpsr
|
||||||
|
|
||||||
|
bb.8.for.body56.preheader:
|
||||||
|
successors: %bb.9(0x80000000)
|
||||||
|
liveins: $r0, $r2, $r4, $r5, $r11, $r12
|
||||||
|
|
||||||
|
$r1 = tMOVr $r4, 14 /* CC::al */, $noreg
|
||||||
|
tCMPi8 renamable $r4, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
t2IT 10, 8, implicit-def $itstate
|
||||||
|
renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate
|
||||||
|
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r1, dead $cpsr = tSUBrr renamable $r4, killed renamable $r1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 3, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
|
||||||
|
bb.9.for.body56 (align 4):
|
||||||
|
successors: %bb.13(0x80000000)
|
||||||
|
liveins: $r0, $r2, $r3, $r4, $r5, $r11, $r12
|
||||||
|
|
||||||
|
renamable $r1 = t2MUL renamable $r0, renamable $r4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
|
||||||
|
renamable $r1 = t2ADDrs renamable $r11, killed renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
$r6 = tMOVr $r4, 14 /* CC::al */, $noreg
|
||||||
|
$r7 = tMOVr $r5, 14 /* CC::al */, $noreg
|
||||||
|
$lr = tMOVr $r3, 14 /* CC::al */, $noreg
|
||||||
|
t2DoLoopStart renamable $r3
|
||||||
|
|
||||||
|
bb.13:
|
||||||
|
successors: %bb.10(0x80000000)
|
||||||
|
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
|
||||||
|
|
||||||
|
bb.10.do.body59 (align 4):
|
||||||
|
successors: %bb.10(0x7c000000), %bb.11(0x04000000)
|
||||||
|
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
|
||||||
|
|
||||||
|
renamable $vpr = MVE_VCTP32 renamable $r6, 0, $noreg
|
||||||
|
MVE_VPST 2, implicit $vpr
|
||||||
|
renamable $r7, renamable $q1 = MVE_VLDRWU32_post killed renamable $r7, 16, 1, renamable $vpr :: (load 16 from %ir.pInT.21, align 4)
|
||||||
|
renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.pCos0.12, align 4)
|
||||||
|
renamable $q0 = MVE_VFMAf32 killed renamable $q0, killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr
|
||||||
|
renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14 /* CC::al */, $noreg
|
||||||
|
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||||
|
t2LoopEnd renamable $lr, %bb.10, implicit-def dead $cpsr
|
||||||
|
tB %bb.11, 14 /* CC::al */, $noreg
|
||||||
|
|
||||||
|
bb.11.do.end66:
|
||||||
|
successors: %bb.12(0x04000000), %bb.9(0x7c000000)
|
||||||
|
liveins: $q0, $r0, $r2, $r3, $r4, $r5, $r11, $r12
|
||||||
|
|
||||||
|
renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r1 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
|
||||||
|
renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
|
||||||
|
renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
|
||||||
|
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
|
||||||
|
tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||||
|
VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx70)
|
||||||
|
tBcc %bb.9, 1 /* CC::ne */, killed $cpsr
|
||||||
|
|
||||||
|
bb.12.for.end72:
|
||||||
|
$sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg
|
||||||
|
$sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11
|
||||||
|
$sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
|
||||||
|
$sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc
|
||||||
|
|
||||||
|
...
|
Loading…
Reference in New Issue
Block a user