mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[VirtRegRewriter] Insert missing killed flags when tracking subregister liveness
VirtRegRewriter may sometimes fail to correctly apply the kill flag where necessary, which causes unecessary code gen on PowerPC. This patch fixes the way masks for defined lanes are computed and the way mask for used lanes is computed. Contact albion.fung@ibm.com instead of author for problems related to this commit. Differential Revision: https://reviews.llvm.org/D92405
This commit is contained in:
parent
a40e17a3fc
commit
8c45c9b8dd
@ -702,9 +702,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
|
||||
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
|
||||
// Keep track of regunit ranges.
|
||||
SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
|
||||
// Keep track of subregister ranges.
|
||||
SmallVector<std::pair<const LiveInterval::SubRange*,
|
||||
LiveRange::const_iterator>, 4> SRs;
|
||||
|
||||
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
|
||||
Register Reg = Register::index2VirtReg(i);
|
||||
@ -724,14 +721,6 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
|
||||
continue;
|
||||
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
|
||||
}
|
||||
|
||||
if (MRI->subRegLivenessEnabled()) {
|
||||
SRs.clear();
|
||||
for (const LiveInterval::SubRange &SR : LI.subranges()) {
|
||||
SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
|
||||
}
|
||||
}
|
||||
|
||||
// Every instruction that kills Reg corresponds to a segment range end
|
||||
// point.
|
||||
for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
|
||||
@ -776,20 +765,18 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
|
||||
// are actually never written by %2. After assignment the <kill>
|
||||
// flag at the read instruction is invalid.
|
||||
LaneBitmask DefinedLanesMask;
|
||||
if (!SRs.empty()) {
|
||||
if (LI.hasSubRanges()) {
|
||||
// Compute a mask of lanes that are defined.
|
||||
DefinedLanesMask = LaneBitmask::getNone();
|
||||
for (auto &SRP : SRs) {
|
||||
const LiveInterval::SubRange &SR = *SRP.first;
|
||||
LiveRange::const_iterator &I = SRP.second;
|
||||
if (I == SR.end())
|
||||
continue;
|
||||
I = SR.advanceTo(I, RI->end);
|
||||
if (I == SR.end() || I->start >= RI->end)
|
||||
continue;
|
||||
// I is overlapping RI
|
||||
DefinedLanesMask |= SR.LaneMask;
|
||||
}
|
||||
for (const LiveInterval::SubRange &SR : LI.subranges())
|
||||
for (const LiveRange::Segment &Segment : SR.segments) {
|
||||
if (Segment.start >= RI->end)
|
||||
break;
|
||||
if (Segment.end == RI->end) {
|
||||
DefinedLanesMask |= SR.LaneMask;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else
|
||||
DefinedLanesMask = LaneBitmask::getAll();
|
||||
|
||||
@ -799,7 +786,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
|
||||
continue;
|
||||
if (MO.isUse()) {
|
||||
// Reading any undefined lanes?
|
||||
LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
|
||||
unsigned SubReg = MO.getSubReg();
|
||||
LaneBitmask UseMask = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
|
||||
: MRI->getMaxLaneMaskForVReg(Reg);
|
||||
if ((UseMask & ~DefinedLanesMask).any())
|
||||
goto CancelKill;
|
||||
} else if (MO.getSubReg() == 0) {
|
||||
|
@ -7303,8 +7303,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace
|
||||
; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
|
||||
; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
|
||||
; GCN-NOHSA-SI-NEXT: buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
|
||||
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1)
|
||||
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v21, s0, v3
|
||||
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s0, v3
|
||||
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v5
|
||||
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, s0, v5
|
||||
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v6
|
||||
@ -7345,7 +7344,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v1
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v1
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v1
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, v21
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v23
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1
|
||||
|
@ -22,7 +22,7 @@
|
||||
# GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF
|
||||
# GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr5, $vcc, implicit $exec
|
||||
# GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr4, killed $vcc, implicit $exec
|
||||
# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 $vgpr4_vgpr5, $vgpr0_vgpr1, implicit $exec
|
||||
# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 killed $vgpr4_vgpr5, killed $vgpr0_vgpr1, implicit $exec
|
||||
---
|
||||
name: vgpr64_mixed_use
|
||||
tracksRegLiveness: true
|
||||
|
@ -367,7 +367,7 @@ body: |
|
||||
|
||||
# GCN-LABEL: vgpr_lo16_sub{{$}}
|
||||
# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
|
||||
# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16
|
||||
# GCN: renamable $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16
|
||||
---
|
||||
name: vgpr_lo16_sub
|
||||
tracksRegLiveness: true
|
||||
@ -404,7 +404,7 @@ body: |
|
||||
|
||||
# GCN-LABEL: vgpr_hi16_sub{{$}}
|
||||
# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
|
||||
# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16
|
||||
# GCN: renamable $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16
|
||||
---
|
||||
name: vgpr_hi16_sub
|
||||
tracksRegLiveness: true
|
||||
@ -441,7 +441,7 @@ body: |
|
||||
|
||||
# GCN-LABEL: sgpr_lo16_sub{{$}}
|
||||
# GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc
|
||||
# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16
|
||||
# GCN: renamable $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16
|
||||
---
|
||||
name: sgpr_lo16_sub
|
||||
tracksRegLiveness: true
|
||||
@ -498,7 +498,7 @@ body: |
|
||||
# Test that bank of subreg is considered during scavenging.
|
||||
# If handled incorrectly an infinite loop occurs.
|
||||
# GCN-LABEL: s0_vs_s15_16_17_sub1{{$}}
|
||||
# GCN: S_AND_B32 renamable $sgpr13, $sgpr0,
|
||||
# GCN: S_AND_B32 killed renamable $sgpr13, $sgpr0,
|
||||
---
|
||||
name: s0_vs_s15_16_17_sub1
|
||||
tracksRegLiveness: true
|
||||
@ -531,7 +531,7 @@ body: |
|
||||
# GCN: $vgpr40_vgpr41_vgpr42_vgpr43 = IMPLICIT_DEF
|
||||
# GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF
|
||||
# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, $vgpr11_vgpr12, 0, killed $vgpr16_vgpr17, 0, 0, implicit $mode, implicit $exec
|
||||
# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec
|
||||
# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, killed $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec
|
||||
---
|
||||
name: vgpr_sub_dependence
|
||||
tracksRegLiveness: true
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}}
|
||||
; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
|
||||
; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x{{[0-9a-f]+}}{{$}}
|
||||
; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}}
|
||||
; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
|
||||
|
||||
; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill
|
||||
|
@ -19,7 +19,7 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK: renamable $sgpr0 = IMPLICIT_DEF
|
||||
; CHECK: renamable $sgpr1 = IMPLICIT_DEF
|
||||
; CHECK: $sgpr104 = S_AND_B32 renamable $sgpr0, renamable $sgpr1, implicit-def $scc
|
||||
; CHECK: $sgpr104 = S_AND_B32 killed renamable $sgpr0, renamable $sgpr1, implicit-def $scc
|
||||
; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; CHECK: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
|
||||
; CHECK: renamable $sgpr0 = IMPLICIT_DEF
|
||||
@ -27,7 +27,7 @@ body: |
|
||||
; CHECK: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store 8 into %stack.0, align 4, addrspace 5)
|
||||
; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; CHECK: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load 8 from %stack.0, align 4, addrspace 5)
|
||||
; CHECK: $sgpr105 = S_AND_B32 renamable $sgpr1, renamable $sgpr1, implicit-def $scc
|
||||
; CHECK: $sgpr105 = S_AND_B32 killed renamable $sgpr1, renamable $sgpr1, implicit-def $scc
|
||||
; CHECK: S_NOP 0, implicit $sgpr104, implicit $sgpr105
|
||||
%0:sreg_64 = COPY $sgpr0_sgpr1
|
||||
%0.sub0:sreg_64 = IMPLICIT_DEF
|
||||
|
@ -39,11 +39,11 @@ body: |
|
||||
# CHECK: S_NOP 0
|
||||
# CHECK: S_NOP 0, implicit renamable [[REG0]]
|
||||
# CHECK: S_NOP 0, implicit renamable [[REG1]]
|
||||
# CHECK: $sgpr0 = COPY renamable [[REG0]]
|
||||
# CHECK: $sgpr0 = COPY killed renamable [[REG0]]
|
||||
# CHECK: $sgpr2 = COPY renamable [[REG1]]
|
||||
# CHECK: S_NOP
|
||||
# CHECK: S_NOP 0, implicit renamable $sgpr0
|
||||
# CHECK: S_NOP 0, implicit renamable $sgpr2
|
||||
# CHECK: S_NOP 0, implicit killed renamable $sgpr2
|
||||
name: func1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
|
@ -16,8 +16,8 @@
|
||||
# CHECK: S_NOP 0, implicit-def renamable $sgpr3
|
||||
# CHECK: S_NOP 0, implicit-def renamable $sgpr1
|
||||
# CHECK: S_NOP 0, implicit-def renamable $sgpr2
|
||||
# CHECK: S_NOP 0, implicit renamable $sgpr0, implicit renamable $sgpr3
|
||||
# CHECK: S_NOP 0, implicit renamable $sgpr1, implicit renamable $sgpr2
|
||||
# CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit renamable $sgpr3
|
||||
# CHECK: S_NOP 0, implicit killed renamable $sgpr1, implicit renamable $sgpr2
|
||||
name: func0
|
||||
body: |
|
||||
bb.0:
|
||||
|
@ -40,24 +40,24 @@ body: |
|
||||
; CHECK: renamable $sgpr13 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $sgpr14 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $sgpr15 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $vgpr5_vgpr6 = COPY renamable $sgpr0_sgpr1
|
||||
; CHECK: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1
|
||||
; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6)
|
||||
; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $sgpr0 = S_MOV_B32 1200
|
||||
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6)
|
||||
; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: KILL renamable $sgpr0, renamable $sgpr1
|
||||
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
|
||||
; CHECK: renamable $sgpr0 = S_MOV_B32 1264
|
||||
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6)
|
||||
; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: KILL renamable $sgpr0, renamable $sgpr1
|
||||
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
|
||||
; CHECK: renamable $sgpr0 = S_MOV_B32 1328
|
||||
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6)
|
||||
; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: KILL renamable $sgpr0, renamable $sgpr1
|
||||
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
|
||||
; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6)
|
||||
; CHECK: renamable $sgpr0 = S_MOV_B32 1392
|
||||
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
|
||||
@ -66,10 +66,10 @@ body: |
|
||||
; CHECK: renamable $sgpr3 = COPY renamable $sgpr5
|
||||
; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6)
|
||||
; CHECK: renamable $sgpr4 = S_MOV_B32 1520
|
||||
; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
|
||||
; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
@ -100,7 +100,7 @@ body: |
|
||||
; CHECK: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr3, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK: renamable $sgpr0 = nofpexcept V_CMP_GT_F32_e64 0, 1028443341, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
|
||||
; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 0, 0, 1065353216, killed $sgpr0, implicit $exec
|
||||
; CHECK: EXP_DONE 12, killed renamable $vgpr0, renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec
|
||||
; CHECK: EXP_DONE 12, killed renamable $vgpr0, killed renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
undef %1.sub0:sgpr_64 = COPY $sgpr0
|
||||
|
@ -40,7 +40,7 @@ body: |
|
||||
; CHECK: $sgpr4 = COPY $sgpr95
|
||||
; CHECK: $vgpr0 = COPY renamable $vgpr40
|
||||
; CHECK: $vgpr1 = COPY renamable $vgpr41
|
||||
; CHECK: $vgpr2 = COPY renamable $vgpr42
|
||||
; CHECK: $vgpr2 = COPY killed renamable $vgpr42
|
||||
; CHECK: $vgpr3 = KILL undef renamable $vgpr3
|
||||
; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
|
||||
; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
|
40
test/CodeGen/PowerPC/subreg-killed.mir
Normal file
40
test/CodeGen/PowerPC/subreg-killed.mir
Normal file
@ -0,0 +1,40 @@
|
||||
# RUN: llc -mcpu=pwr10 -O3 -ppc-track-subreg-liveness -verify-machineinstrs \
|
||||
# RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \
|
||||
# RUN: -o - %s | FileCheck %s
|
||||
|
||||
# This test case checks that the 'killed' flag is properly added when using
|
||||
# subregisters.
|
||||
|
||||
# CHECK-LABEL: test
|
||||
# CHECK: KILL_PAIR killed
|
||||
# CHECK-NEXT: COPY killed
|
||||
# CHECK-NEXT: KILL_PAIR killed
|
||||
# CHECK-NEXT: COPY killed
|
||||
# CHECK-NEXT: BUILD_UACC killed
|
||||
# CHECK-NEXT: XXMTACC killed
|
||||
# CHECK-NEXT: SPILL_ACC killed
|
||||
|
||||
---
|
||||
name: test
|
||||
tracksRegLiveness: true
|
||||
fixedStack:
|
||||
- { id: 0, size: 8 }
|
||||
stack:
|
||||
- { id: 0, size: 64 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $v2, $v3, $v4, $v5
|
||||
undef %4.sub_vsx1:vsrprc_with_sub_64_in_vfrc = COPY $v5
|
||||
%4.sub_vsx0:vsrprc_with_sub_64_in_vfrc = COPY $v4
|
||||
undef %5.sub_vsx1:vsrprc_with_sub_64_in_vfrc = COPY $v3
|
||||
%5.sub_vsx0:vsrprc_with_sub_64_in_vfrc = COPY $v2
|
||||
%6:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0
|
||||
%5:vsrprc_with_sub_64_in_vfrc = KILL_PAIR %5
|
||||
undef %7.sub_pair0:uaccrc = COPY %5
|
||||
%4:vsrprc_with_sub_64_in_vfrc = KILL_PAIR %4
|
||||
%7.sub_pair1:uaccrc = COPY %4
|
||||
%8:accrc = BUILD_UACC %7
|
||||
%8:accrc = XXMTACC %8
|
||||
SPILL_ACC %8, 0, %stack.0
|
||||
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user