mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[AMDGPU] Fix ds combine with subregs
Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047
This commit is contained in:
parent
f8c4c3a58a
commit
011d08c69a
@ -514,6 +514,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
|
||||
DebugLoc DL = CI.I->getDebugLoc();
|
||||
|
||||
unsigned BaseReg = AddrReg->getReg();
|
||||
unsigned BaseSubReg = AddrReg->getSubReg();
|
||||
unsigned BaseRegFlags = 0;
|
||||
if (CI.BaseOff) {
|
||||
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||
@ -525,15 +526,16 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
|
||||
|
||||
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
|
||||
.addReg(ImmReg)
|
||||
.addReg(AddrReg->getReg());
|
||||
.addReg(AddrReg->getReg(), 0, BaseSubReg);
|
||||
BaseSubReg = 0;
|
||||
}
|
||||
|
||||
MachineInstrBuilder Read2 = BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
|
||||
.addReg(BaseReg, BaseRegFlags) // addr
|
||||
.addImm(NewOffset0) // offset0
|
||||
.addImm(NewOffset1) // offset1
|
||||
.addImm(0) // gds
|
||||
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
|
||||
.addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
|
||||
.addImm(NewOffset0) // offset0
|
||||
.addImm(NewOffset1) // offset1
|
||||
.addImm(0) // gds
|
||||
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
|
||||
|
||||
(void)Read2;
|
||||
|
||||
@ -601,6 +603,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
|
||||
DebugLoc DL = CI.I->getDebugLoc();
|
||||
|
||||
unsigned BaseReg = AddrReg->getReg();
|
||||
unsigned BaseSubReg = AddrReg->getSubReg();
|
||||
unsigned BaseRegFlags = 0;
|
||||
if (CI.BaseOff) {
|
||||
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||
@ -612,17 +615,18 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
|
||||
|
||||
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
|
||||
.addReg(ImmReg)
|
||||
.addReg(AddrReg->getReg());
|
||||
.addReg(AddrReg->getReg(), 0, BaseSubReg);
|
||||
BaseSubReg = 0;
|
||||
}
|
||||
|
||||
MachineInstrBuilder Write2 = BuildMI(*MBB, CI.Paired, DL, Write2Desc)
|
||||
.addReg(BaseReg, BaseRegFlags) // addr
|
||||
.add(*Data0) // data0
|
||||
.add(*Data1) // data1
|
||||
.addImm(NewOffset0) // offset0
|
||||
.addImm(NewOffset1) // offset1
|
||||
.addImm(0) // gds
|
||||
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
|
||||
.addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
|
||||
.add(*Data0) // data0
|
||||
.add(*Data1) // data1
|
||||
.addImm(NewOffset0) // offset0
|
||||
.addImm(NewOffset1) // offset1
|
||||
.addImm(0) // gds
|
||||
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
|
||||
|
||||
moveInstsAfter(Write2, CI.InstsToMove);
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
|
||||
# clobbered.
|
||||
|
||||
# GCN-LABEL: name: kernel
|
||||
# GCN-LABEL: name: ds_combine_base_offset{{$}}
|
||||
|
||||
# VI: V_ADD_I32_e64 %6, %0,
|
||||
# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
|
||||
@ -21,7 +21,37 @@
|
||||
--- |
|
||||
@0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4
|
||||
|
||||
define amdgpu_kernel void @kernel() {
|
||||
define amdgpu_kernel void @ds_combine_base_offset() {
|
||||
bb.0:
|
||||
br label %bb2
|
||||
|
||||
bb1:
|
||||
ret void
|
||||
|
||||
bb2:
|
||||
%tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
|
||||
%tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
|
||||
%tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
|
||||
%tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
|
||||
br label %bb1
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @ds_combine_base_offset_subreg() {
|
||||
bb.0:
|
||||
br label %bb2
|
||||
|
||||
bb1:
|
||||
ret void
|
||||
|
||||
bb2:
|
||||
%tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
|
||||
%tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
|
||||
%tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
|
||||
%tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
|
||||
br label %bb1
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @ds_combine_subreg() {
|
||||
bb.0:
|
||||
br label %bb2
|
||||
|
||||
@ -36,7 +66,7 @@
|
||||
br label %bb1
|
||||
}
|
||||
---
|
||||
name: kernel
|
||||
name: ds_combine_base_offset
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
@ -58,3 +88,69 @@ body: |
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}
|
||||
|
||||
# VI: V_ADD_I32_e64 %6, %0.sub0,
|
||||
# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
|
||||
# VI: V_ADD_I32_e64 %10, %3.sub0,
|
||||
# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
|
||||
|
||||
# GFX9: V_ADD_U32_e64 %6, %0.sub0,
|
||||
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,
|
||||
# GFX9: V_ADD_U32_e64 %9, %3.sub0,
|
||||
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
|
||||
---
|
||||
name: ds_combine_base_offset_subreg
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vreg_64 = IMPLICIT_DEF
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
S_ENDPGM
|
||||
|
||||
bb.2:
|
||||
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
|
||||
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
|
||||
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
|
||||
DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
|
||||
undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
|
||||
%4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
|
||||
%5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: ds_combine_subreg{{$}}
|
||||
|
||||
# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,
|
||||
# VI: DS_READ2_B32 %3.sub0, 0, 8,
|
||||
|
||||
# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,
|
||||
# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,
|
||||
---
|
||||
name: ds_combine_subreg
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vreg_64 = IMPLICIT_DEF
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
S_ENDPGM
|
||||
|
||||
bb.2:
|
||||
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
|
||||
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
|
||||
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
|
||||
DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
|
||||
undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
|
||||
%4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
|
||||
%5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.1
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user